题目描述:
给定n 个字符串,求出现在不小于k 个字符串中的最长子串。
解题报告:
将n 个字符串连起来,中间用不相同的且没有出现在字符串中的字符隔开,求后缀数组。然后二分答案,将后缀分成若干组,判断每组的后缀是否出现在不小于k 个的原串中。
注意: 1、题目要求是超过一半有最大的公共子串,即 》 N / 2,并且 N = 1时直接输出;
总结: 1、DC3的后缀模板确实快。
2、本题的最后只要对得到的rank【】(即排名)排个序就行了,我就是这里wa了好久。
3、本题还是花了我不少时间,最后终于在POJ和Uva都AC了,也是一大安慰吧。鼓励一下。
#include <cstdio>
#include <memory.h>
#include <string.h>
#include <algorithm>
using namespace std;
const int maxn = 401000;
int ws[maxn],wa[maxn],wb[maxn],wv[maxn];
int sa[maxn],rank[maxn],height[maxn];
int a[maxn];
char s[maxn];
int indx[110],maxLen,ans[maxn],cnta;
bool vis[110];
//dc3
#define F(x) ((x)/3 + ((x)%3 == 1 ? 0:tb))
#define G(x) ((x) < tb ? (x)*3+1 : ((x)-tb)*3 + 2)
int c0(int *r, int a, int b)
{
return r[a] == r[b] && r[a+1] == r[b+1] && r[a+2] == r[b+2];
}
int c12(int k, int *r, int a,int b)
{
if (k == 2)
return r[a] < r[b] || r[a] == r[b] && c12(1,r,a+1,b+1);
else return r[a] < r[b] || r[a] == r[b] && wv[a+1] < wv[b+1];
}
void sort(int *r, int *a,int *b,int n,int m)
{
int i;
for (i = 0; i < n; i++) wv[i] = r[a[i]];
for (i = 0; i < m; i++) ws[i] = 0;
for (i = 0; i < n; i++) ws[wv[i]]++;
for (i = 1; i < m; i++) ws[i] += ws[i-1];
for (i = n-1; i >= 0; i--) b[--ws[wv[i]]] = a[i];
return ;
}
void dc3(int *r,int *sa,int n, int m)
{
int i,j,*rn = r + n, *san = sa + n;
int ta = 0, tb = (n+1)/3,tbc = 0, p;
r[n] = r[n+1] = 0;
for (i = 0; i < n; i++)
if (i%3 != 0)
wa[tbc++] = i;
sort(r+2, wa, wb, tbc, m);
sort(r+1, wb, wa, tbc, m);
sort( r, wa, wb, tbc, m);
for (p = 1, rn[F(wb[0])] = 0, i = 1; i < tbc; i++)
rn[F(wb[i])] = c0(r,wb[i-1],wb[i]) ? p-1 : p++;
if (p < tbc)
dc3(rn, san, tbc, p);
else for (i = 0; i < tbc; i++)
san[rn[i]] = i;
for (i = 0; i < tbc; i++)
if (san[i] < tb)
wb[ta++] = san[i]*3;
if (n%3 == 1)
wb[ta++] = n-1;
sort(r, wb, wa, ta, m);
for (i = 0; i < tbc; i++)
wv[wb[i]=G(san[i])] = i;
for (i = 0,j = 0,p = 0; i < ta && j < tbc; p++)
sa[p] = c12(wb[j]%3,r,wa[i],wb[j]) ? wa[i++] : wb[j++];
for ( ; i < ta; p++)
sa[p] = wa[i++];
for ( ; j < tbc; p++)
sa[p] = wb[j++];
return ;
}
void calHeight(int *r,int *sa,int n)
{
int i,j,k = 0;
for (i = 0; i <= n; i++)
rank[sa[i]] = i;
for (i = 0; i < n; height[rank[i++]] = k)
for (k ? k-- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k++)
;
return ;
}
bool exam(const int &lf,const int &rt,const int &k)
{
int i;
i = k;
while (lf < indx[i])
i--;
i++;
if (!vis[i] && rt < indx[i])
{
vis[i] = true;
return true;
}
return false;
}
bool check(int len,int n,int k)
{
int sum = 0,i,count = 0;
memset(vis,false,sizeof(vis));
//aprintf("len = %d/******************/\n",len);
for ( i = k; i <= n; i++)
{
if (height[i] < len)
{
if (sum > k/2)
{
ans[++count] = i-1;
}
memset(vis,false,sizeof(vis));
sum = 0;
}
else {
if (exam(sa[i],sa[i] + len - 1,k))
sum ++;
if (exam(sa[i-1],sa[i-1] + len - 1, k))
sum ++;
// printf("i = %d,sum = %d\n",i,sum);
}
}
if (sum > k/2)
{
ans[++count] = i-1;
//printf("i = %d,sum = %d\n",i,sum);
}
if (count > 0)
{
maxLen = len; cnta = count;
return true;
}
return false;
}
int main()
{
int n,k,i,j,len,maxch,mlen,cas = 1,h;
while (scanf("%d", &k) && k)
{
n = 0;
maxch = mlen = 0;
indx[0] = 0;
for (i = 1; i <= k; i++)
{
scanf("%s", s);
len = strlen(s);
if (mlen < len)
mlen = len;
for (j = 0; j < len; j++)
{
a[n++] = (int)s[j];
if (a[n-1] > maxch)
maxch = a[n-1];
}
a[n++] = 0;
indx[i] = n - 1;
}
if (cas != 1)
printf("\n");
cas ++;
if (k == 1)
{
printf("%s\n",s);
continue;
}
dc3(a,sa,n,maxch+1);
calHeight(a,sa,n-1);
/* for (i = 0; i < n; i++)
if(a[i] != 0)
printf("%c",(char)a[i]);
else printf("0");
printf("\n");
for (i = 0; i < n; i++)
printf("i = %d,sa = %d,rank = %d,h = %d\n",i,sa[i],rank[sa[i]],height[i]);*/
int l = 1, r = mlen + 1,mid;
maxLen = 0; cnta = -1;
while (l < r)
{
mid = (l + r) / 2;
if (check(mid,n-1,k))
l = mid + 1;
else r = mid;
}
if (maxLen <= 0)
printf("?\n");
else {
sort(ans+1,ans+1+cnta);
for (i = 1; i <= cnta; i++)
{
// printf("sa = %d\n",ans[i]);
for (j = 0,h = sa[ans[i]]; j < maxLen; j++)
printf("%c",(char)a[h+j]);
printf("\n");
}
}
}
return 0;
}