字符串哈希

hash[l...r] = (hash[r] - hash[l - 1] * (p ^ (r - l + 1))) % mod;求中间字符串的哈希值

假设一个串s,那么字串s[i, j]的Hash值就是H[i, j]=s[i]+s[i+1]*x+s[i+2]*(x^2)+...+s[j]*(x^(i-j))。

https://vjudge.net/contest/242212#problem/B

哈希值有时和二分一起出题,这一题就是把每一个字符串转成一个hash值,每次找这个字符串的时候就二分去找这个字符串的hash值。

哈希值的乘数一般取131与13331,取模的数一般可以去1e9 + 7与1e9 + 9,有时候可能要用双哈希,就是两种取模值,两个都相同才算这个字符串相同、

A - Crazy Search

 POJ - 1200 

//求一个字符串中有多少个长度为n的子串,所以就用hash值来区分,然后题目还输入了只有几种字母,所以哈希的乘数可以就取这个数,把他当做一个x进制数。
#include<cstdio> #include<cstring> #include<algorithm> using namespace std; const int maxm = 16000000; int has[maxm]; char a[maxm]; int num[300]; int n, m; int main() { while(~scanf("%d%d", &n, &m)) { scanf("%s", a); int len = strlen(a); memset(has, 0, sizeof(has)); memset(num, 0, sizeof(num)); int cnt = 0; for(int i = 0; i < len; i++) { if(!num[a[i] ]) num[a[i] ] = cnt++; } int res = 0; for(int i = 0; i <= len - n; i++) { int sum = 0; for(int j = i; j < n + i; j++) { sum = sum * m + num[a[j] ]; } if(!has[sum]) { res++; has[sum] = 1; } } printf("%d ", res); } return 0; }

D - Subpalindromes

 URAL - 1989 

//这一题可以用bits或者线段树做,下面是树状数组的做法
//题目是要判断l到r是不是回文串,或者单点更新,回文串判断就是把字符翻转过来,取两个树状数组,然后在来比较hash值
#include <cstdio> #include <cstring> #include <algorithm> #define ULL unsigned long long using namespace std; const int N = 100005; int n, m, l, r, pos; char str[N], q[20], c; ULL C[N][2], Hash[N]; int lowbit(int x) { return x & (-x); } ULL Sum(int x, int y) { ULL ret = 0; while (x > 0) { ret += C[x][y]; x -= lowbit(x); } return ret; } void Add(int x, ULL d, int y) { while (x <= n) { C[x][y] += d; x += lowbit(x); } } int main() { Hash[0] = 1; for (int i = 1; i < N; i++) Hash[i] = Hash[i - 1] * 27;//都是小写字母 while (scanf("%s", str) == 1) { memset(C, 0, sizeof(C)); n = strlen(str); for (int i = 0; i < n; i++) { Add(i + 1, (str[i] - 'a') * Hash[i], 0); Add(i + 1, (str[n - i - 1] - 'a') * Hash[i], 1); } scanf("%d", &m); while (m--) { scanf("%s", q); if (q[0] == 'p') { scanf("%d%d", &l, &r); ULL h1 = (Sum(r, 0) - Sum(l - 1, 0)) * Hash[n - r]; ULL h2 = (Sum(n - l + 1, 1) - Sum(n - r, 1)) * Hash[l - 1];
//这个还额外乘一个hash值是因为头和尾剩余的串长度不同,所以要乘。如果是正向的,那么他其实有了前面l的数量级,而反向的就是后面r的数量级,所以就都把剩余的补上。
if (h1 == h2) printf("Yes "); else printf("No "); } else { scanf("%d %c", &pos, &c); int x = c - str[pos - 1]; Add(pos, x * Hash[pos - 1], 0); Add(n - pos + 1, x * Hash[n - pos], 1); str[pos - 1] = c; } } } return 0; }

 Subpalindromes

 URAL - 1989 

https://vjudge.net/contest/242212#problem/D

一个字符串是否能够分成三个回文串,hash做法。

#include<cstdio>
#include<cstring>
#include<algorithm>
#include<iostream>
#include<cmath>
//#define rep(i,l,r) for (int i=l;i<=r;i++)
#define maxn 200500
#define p 1000000007
using namespace std;
typedef unsigned long long ll;
ll a[maxn],b[maxn],k[maxn];
int c[maxn],d[maxn],t1,t2,cnt,n,t;
char s[maxn];
int main()
{
    scanf("%d",&t);
    k[0]=1;
//    rep(i,1,20000) k[i]=k[i-1]*p;
    for(int i = 1; i <= 20000; i++) k[i] = k[i - 1] * p;
    while (t--)
    {
        scanf("%s",s+1);
        n=strlen(s+1);
        t1=t2=cnt=b[n+1]=0;
        for (int i=1; i<=n; i++)
            a[i]=a[i-1]*p+s[i];
        for (int i=n; i>=1; i--)
            b[i]=b[i+1]*p+s[i];
        for (int i=1; i<=n; i++)
        {
            ll h=b[1]-b[i+1]*k[i];
            if (h==a[i])
                c[++t1]=i;
            h=a[n]-a[n-i]*k[i];
            if (h==b[n-i+1])
                d[++t2]=n-i+1;
        }
//对于字符串延伸到端点的字符串,可以这样计算。 reverse(c
+1,c+1+t1); int l,r,flag=0; // rep(i,1,t1) for(int i = 1; i <= t1; i++) { if (flag) break; // rep(j,1,t2) for(int j = 1; j <= t2; j++) { l=c[i]+1; r=d[j]-1; if (l>r||cnt>2e6) break; ++cnt; ll h1=a[r]-a[l-1]*k[r-l+1]; ll h2=b[l]-b[r+1]*k[r-l+1]; if (h1==h2) flag=1; } } if (flag) printf("Yes "); else printf("No "); } return 0; }

二维哈希模板

https://cn.vjudge.net/contest/299832#problem/K

题目是给出一个矩形模式串,然后给出一个矩形文本串,求模式串矩阵最多在文本串中出现多少次,直接二维哈希(蓝书上这题是用ac自动机做得,复杂度明显比不过哈希)。

#include<cstdio>
#include<iostream>
using namespace std;
#define maxn 2005
#define ULL unsigned long long
ULL maxSize = 4000007;
ULL seedx = 131;
ULL seedy = 13331;
struct HashString {
    ULL xp[maxn];
    ULL yp[maxn];
    ULL H[maxn][maxn];
    char s[maxn][maxn];
    int x,y;
    void init(int n, int m) {
        xp[0] = yp[0] = 1;
        x = n,y = m;
        for(int i = 1; i <= x; i++) xp[i] = xp[i - 1]*seedx;
        for(int i = 1; i <= y; i++) yp[i] = yp[i - 1]*seedy;
        return;
    }
    void initHash() {
        for(int i = 0; i < x; i++) {
            H[i][0] = s[i][0] - 'A' + 1;
            for(int j = 1; j < y; j++)
                H[i][j] = H[i][j - 1]*seedy + s[i][j] - 'A' + 1;
        }
        for(int i = 1; i < x; i++)//从1开始, 第0行是不变的
            for(int j = 0; j < y; j++)
                H[i][j] += H[i - 1][j]*seedx;
        return;
    }
    ULL askHash(int x1, int y1, int x2, int y2) { //询问以(x1, y1)为左上角, (x2, y2)为右上角的字符矩阵的hash值
        ULL ret = H[x2][y2];
        if(x1 > 0) ret -= H[x1 - 1][y2]*xp[x2 - x1 + 1];
        if(y1 > 0) ret -= H[x2][y1 - 1]*yp[y2 - y1 + 1];
        if(x1 > 0 && y1 > 0) ret += H[x1 - 1][y1 - 1]*xp[x2 - x1 + 1]*yp[y2 - y1 + 1];
        return ret;
    }
    void read()
    {
        for(int i=0;i<x;i++)
            scanf("%s",s[i]);
    }
}s1,s2;


int t;
int main() {
//    #ifdef ACBang
//        freopen("1.in","r",stdin);
//    #endif // ACBang
    int x,y,n,m;
    scanf("%d", &t);
    while(t--)
    {
        scanf("%d%d", &n, &m);
        s1.init(n,m);
        s1.read();
        s1.initHash();
        scanf("%d%d", &x, &y);
        s2.init(x,y);
        s2.read();
        s2.initHash();
        ULL tt = s2.askHash(0,0,x-1,y-1);
        int ans = 0;
        for(int i = 0;i<n-x+1;i++)
            {
                for(int j = 0;j<m-y+1;j++)
                {
                    if(tt == s1.askHash(i,j,i+x-1,j+y-1))
                      ans ++;
                }
            }
        printf("%d
",ans);
    }
    return 0;
}

求两个字符串的最长公共回文串长度。

https://ac.nowcoder.com/acm/contest/908/H

字符串哈希做法,可以用后缀数组做。

#include <bits/stdc++.h>
#define dbg(x) std::cerr << #x << " = " << x << "
"
using namespace std;
typedef long long ll;

const int N = 100000 + 10;
char x[N], y[N];
unsigned long long px[N], px1[N], py[N], pw[N];
int n, m;
unordered_map<unsigned long long, int> mp;

bool ok(int l, int r) {
    return (px[r] - px[l - 1] * pw[r - l + 1]) == (px1[l] - px1[r + 1] * pw[r - l + 1]);
}
bool check(int len) {
  //这里改成vector也是可行的,排序之后二分去找哈希值。 mp.clear();
for(int i = len; i <= n; i++) if(ok(i - len + 1, i)) mp[px[i] - px[i - len] * pw[len]] = 1; for(int i = len; i <= m; i++) { if(mp[py[i] - py[i - len] * pw[len]]) return 1; } return 0; } int main() { pw[0] = 1; for(int i = 1; i <= 100000; i++) pw[i] = pw[i - 1] * 131; while(~scanf("%s%s", (x + 1), (y + 1))) { n = strlen(x + 1), m = strlen(y + 1); for(int i = 1; i <= n; i++) { px[i] = px[i - 1] * 131 + x[i]; } for(int i = 1; i <= m; i++) py[i] = py[i - 1] * 131 + y[i]; for(int i = n; i >= 1; i--) px1[i] = px1[i + 1] * 131 + x[i]; int ans = 0; int l = 1, r = min(n, m) / 2, ans2 = 0; while(l <= r) { int mid = (l + r) >> 1; if(check(2 * mid)) ans2 = 2 * mid, l = mid + 1; else r = mid - 1; } ans = max(ans, ans2); l = 0, r = min(n - 1, m - 1) / 2, ans2 = 0; while(l <= r) { int mid = (l + r) >> 1; if(check(2 * mid + 1)) ans2 = 2 * mid + 1, l = mid + 1; else r = mid - 1; } ans = max(ans, ans2); printf("%d ", ans); } return 0; }

挂链哈希,更快

#include <bits/stdc++.h>
#define dbg(x) std::cerr << #x << " = " << x << "
"
using namespace std;
typedef long long ll;

const int N = 100000 + 10;
char x[N], y[N];
unsigned long long px[N], px1[N], py[N], pw[N];
int n, m;

bool ok(int l, int r) {
    return (px[r] - px[l - 1] * pw[r - l + 1]) == (px1[l] - px1[r + 1] * pw[r - l + 1]);
}
struct HashSet{
    static const int mo = 9997;
    int head[mo + 100],total;
    int nex[N];
    unsigned long long true_hash[N];

    void Clear() {
        memset(head,0,sizeof(head));
        total = 0;
    }
    void inset(unsigned long long ha) {
        int x = ha % mo;
        nex[++total] = head[x];
        true_hash[total] = ha;
        head[x] = total;
    }
    bool fid(unsigned long long ha) {
        int x = ha % mo;
        for(int i = head[x]; i; i = nex[i])
            if(true_hash[i] == ha) return true;
        return false;
    }
} mp;
bool check(int len) {
    mp.Clear();
    for(int i = len; i <= n; i++)
        if(ok(i - len + 1, i)) mp.inset(px[i] - px[i - len] * pw[len]);
    for(int i = len; i <= m; i++) {
        if(mp.fid(py[i] - py[i - len] * pw[len])) return true;
    }
    return false;
}
int main() {
    pw[0] = 1;
    for(int i = 1; i <= 100000; i++) pw[i] = pw[i - 1] * 131;
    while(~scanf("%s%s", (x + 1), (y + 1))) {
        n = strlen(x + 1), m = strlen(y + 1);
        for(int i = 1; i <= n; i++) {
            px[i] = px[i - 1] * 131 + x[i];
        }

        for(int i = 1; i <= m; i++)
            py[i] = py[i - 1] * 131 + y[i];
        for(int i = n; i >= 1; i--)
            px1[i] = px1[i + 1] * 131 + x[i];
        int ans = 0;
        int l = 1, r = min(n, m) / 2, ans2 = 0;
        while(l <= r) {
            int mid = (l + r) >> 1;
            if(check(2 * mid)) ans2 = 2 * mid, l = mid + 1;
            else r = mid - 1;
        }
        ans = max(ans, ans2);
        l = 0, r = min(n - 1, m - 1) / 2, ans2 = 0;
        while(l <= r) {
            int mid = (l + r) >> 1;
            if(check(2 * mid + 1)) ans2 = 2 * mid + 1, l = mid + 1;
            else r = mid - 1;
        }
        ans = max(ans, ans2);
        printf("%d
", ans);
    }
    return 0;
}

http://poj.org/problem?id=1743

一个序列,求变化趋势相同的子串,可以先差分一下,检查出现两次以上且不重合的最长长度。

这题正解是用 后缀自动机做。

用hash写主要是学一下挂链哈希。

#include<cstdio>
#include<cstring>
#include<queue>
#include<vector>
#include<algorithm>
#include<map>
#include<cstdlib>
#include<iostream>
using namespace std;
typedef unsigned long long ll;
const int seed = 131;
const int N = 2e4 + 5;
int n;
int a[N];
ll pw[N], px[N];
struct HashSet{
    static const int mo = 9997;
    int head[mo + 100],total;
    int next[N],pos[N];
    unsigned long long true_hash[N];

    void Clear() {
        memset(head,0,sizeof(head));
        total = 0;
    }
    bool Insert(unsigned long long ha,int _pos,int ans) {
        int x = ha % mo;
        for(int i = head[x];i;i = next[i])
            if(true_hash[i] == ha && _pos - pos[i] >= ans)
                return true;
        next[++total] = head[x];
        true_hash[total] = ha;
        pos[total] = _pos;
        head[x] = total;
        return false;
    }
} mp;
bool check(int x) {
    mp.Clear();
    for(int i = x; i <= n; i++) {
        if(mp.Insert(px[i] - px[i - x] * pw[x], i, x)) return true;
    }
    return false;
}

int main() {
    pw[0] = 1;
    for(int i = 1; i < N; i++) pw[i] = pw[i - 1] * seed;
    while(~scanf("%d", &n) && n) {
        for(int i = 1; i <= n; i++) scanf("%d", &a[i]);
        for(int i = 1; i < n; i++) a[i] = a[i + 1] - a[i] + 88;
        pw[0] = 1;
        n--;
        for(int i = 1; i <= n; i++) px[i] = (px[i - 1] * seed + a[i]);
        int l = 4, r = n / 2;
        int ans = 0;
        while(l <= r) {
            int mid = (l + r) >> 1;
            if(check(mid)) {
                l = mid + 1;
                ans = mid;
            }
            else r = mid - 1;
        }
        if(ans >= 4)printf("%d
", ans + 1);
        else printf("0
");
    }
    return 0;
}
原文地址:https://www.cnblogs.com/downrainsun/p/10447965.html