HDU 5769 Substring(后缀数组）

Substring

Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 65536/65536 K (Java/Others)
Total Submission(s): 1570 Accepted Submission(s): 618

Problem Description

?? is practicing his program skill, and now he is given a string, he has to calculate the total number of its distinct substrings.
But ?? thinks that is too easy, he wants to make this problem more interesting.
?? likes a character X very much, so he wants to know the number of distinct substrings which contains at least one X.
However, ?? is unable to solve it, please help him.

Input

The first line of the input gives the number of test cases T;T test cases follow.
Each test case is consist of 2 lines:
First line is a character X, and second line is a string S.
X is a lowercase letter, and S contains lowercase letters(‘a’-‘z’) only.

T<=30
1<=|S|<=10^5
The sum of |S| in all the test cases is no more than 700,000.

Output

For each test case, output one line containing “Case #x: y”(without quotes), where x is the test case number(starting from 1) and y is the answer you get for that case.

Sample Input

2 
a 
abc 
b 
bbb

Sample Output

Case #1: 3 
Case #2: 3
Hint



In first case, all distinct substrings containing at least one a: a, ab, abc. 
In second case, all distinct substrings containing at least one b: b, bb, bbb.

Author

FZU

分析：题目中求包含特定字母的不重复子串，我们知道，求不重复子串是后缀数组的基本运用，可以先求出总的不重复子串。

然后求不包含该字母的不重复子串，最后两者相减，即为所求,

计算不包含该字母的不重复子串的时候，可以先求出不包含该字母的总子串数.

再在去重的时候，只减去不包含特定字母的height值即可.

这样的话，我们需要用一个数组记录每个字母，右边最近的特定字母的位置.

代码如下：

#include <cstdio>
#include <iostream>
#include <algorithm>
#include <cstring>
typedef long long ll;
using namespace std;
const int MAXN=200010;
int r[MAXN];
int wa[MAXN],wb[MAXN],wv[MAXN],Ws[MAXN];
char str[MAXN];
int cmp(int *r,int a,int b,int l)
{return r[a]==r[b]&&r[a+l]==r[b+l];}
void da(const char r[],int sa[],int n,int m)  //n为len+1,m一般比数组中最大的数大一点即可
{
      int i,j,p,*x=wa,*y=wb,*t;
      for(i=0; i<m; i++) Ws[i]=0;
      for(i=0; i<n; i++) Ws[x[i]=r[i]]++;
      for(i=1; i<m; i++) Ws[i]+=Ws[i-1];
      for(i=n-1; i>=0; i--) sa[--Ws[x[i]]]=i;
      for(j=1,p=1; p<n; j*=2,m=p)
      {
            for(p=0,i=n-j; i<n; i++) y[p++]=i;
            for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
            for(i=0; i<n; i++) wv[i]=x[y[i]];
            for(i=0; i<m; i++) Ws[i]=0;
            for(i=0; i<n; i++) Ws[wv[i]]++;
            for(i=1; i<m; i++) Ws[i]+=Ws[i-1];
            for(i=n-1; i>=0; i--) sa[--Ws[wv[i]]]=y[i];
            for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1; i<n; i++)
                  x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
      }
      return;
}
int sa[MAXN],Rank[MAXN],height[MAXN];// sa是通过后缀排名找到它在字符串中的位置，rank是根据位置找到后缀排名，两者相逆，该模板中sa数组的最小值为1。

void calheight(const char *r,int *sa,int n)
{
      int i,j,k=0;
      for(i=1; i<=n; i++) Rank[sa[i]]=i;
      for(i=0; i<n; height[Rank[i++]]=k)
            for(k?k--:0,j=sa[Rank[i]-1]; r[i+k]==r[j+k]; k++);
}
int main()
{
  int t,len,Case=0;;
  char ch;
  scanf("%d",&t);
  while(t--)
  {
      Case++;
      ll ans2=0;
      memset(r,0,sizeof(r));
      scanf(" %c",&ch);
      scanf("%s",str);
      len=strlen(str);
      int rr=len;
      for(int i=len-1;i>=0;i--)
      {
         if(str[i]!=ch)
         {
           r[i]=rr;
         }
         else
         {
           ll h=(rr-i-1);
           rr=i;
           r[i]=rr;
           ans2+=h*(h+1)/2;
         }
      }
      ans2+=rr*(rr+1)/2;
    //  cout<<ans2<<endl;
      da(str,sa,len+1,130);
      calheight(str,sa,len);
       for(int i=2;i<=len;i++)
      {
        ans2-=min(height[i],r[sa[i]]-sa[i]);
      }
      ll ans=(long long)len*(len+1)/2;
      for(int i=2;i<=len;i++)
      {
        ans-=height[i];
      }
    //  cout<<ans<<" "<<ans2<<endl;
       printf("Case #%d: %lld
",Case,ans-ans2);
  }
return 0;
}