Rabin Karp 算法

部分转载

Rabin-Karp算法

Rabin-Karp算法是由Rabin和Karp提出的一个在实际中有比较好应用的字符串匹配算法，此算法的预处理时间为O(m)，但它的在最坏情况下的时间复杂度为O((2n-m+1)m)，而平均复杂度接近O(m+n)，此算法的主要思想就是通过对字符串进行哈稀运算，使得算法可以容易的派出大量的不相同的字符串，假设模式字符串的长度为m，利用
Horner法则p = p[m] + 10(p[m -1] + 10(p[m-2]+...+10(p[2]+10p[1])...))，求出模式字符串的哈稀值p,而对于文本字符串来说，对应于每个长度为m的子串的哈稀值为t(s+1)=10(t(s)-10^(m-1)T[s+1])+T[s+m+1]，然后比较此哈稀值与模式字符串的哈稀值是否相等，若不相同，则字符串一定不同，若相同，则需要进一步的按位比较，所以它的最坏情况下的时间复杂度为O（mn）

附上example：

First attempt

hash(y[0 .. 7]) = 17819

Second attempt

hash(y[1 .. 8]) = 17533

Third attempt

hash(y[2 .. 9]) = 17979

Fourth attempt

hash(y[3 .. 10]) = 19389

Fifth attempt

hash(y[4 .. 11]) = 17339

Sixth attempt

hash(y[5 .. 12]) = 17597

Seventh attempt

hash(y[6 .. 13]) = 17102

Eighth attempt

hash(y[7 .. 14]) = 17117

Ninth attempt

hash(y[8 .. 15]) = 17678

Tenth attempt

hash(y[9 .. 16]) = 17245

Eleventh attempt

hash(y[10 .. 17]) = 17917

Twelfth attempt

hash(y[11 .. 18]) = 17723

Thirteenth attempt

hash(y[12 .. 19]) = 18877

Fourteenth attempt

hash(y[13 .. 20]) = 19662

Fifteenth attempt

hash(y[14 .. 21]) = 17885

Sixteenth attempt

hash(y[15 .. 22]) = 19197

Seventeenth attempt

hash(y[16 .. 23]) = 16961

所以有8个字符匹配

该方法可扩展为二维，即寻找是否有匹配的矩阵。

即只需要将矩阵转化为一个值：

A[0][0]*P²*Q² A[0][1]*P¹ *Q² A[0][2]*P⁰*Q²

A[0][0]*P²*Q¹ A[0][1]*P¹ *Q¹ A[0][2]*P⁰*Q¹

A[0][0]*P² *Q⁰ A[0][1]*P¹ *Q⁰ A[0][2]*P⁰*Q⁰

这样将矩阵的所有值相加就得到了最后的key值，枚举矩阵的每一个位置，看形成的矩阵的key是否与目标矩阵相同，如果相同就判断其中每一元素是否一样，不相同则不可能匹配，继续下一位置。

在一维的情况下，对于每次动态更新要

tmp = ((tmp – x[i – m] * powP[m - 1] ) * P + x[i]) % mod;

在二维的情况下，首先对于大的矩阵（要在其中查找是否匹配的矩阵），要对每个列进行求值，然后累加，并且在更新行的时候只需

tmp = ((tmp – x[i – m] * powP[m - 1] ) * P + x[i]) % mod;(与上面相同)

更新列的时候：

For(t = 1 -> n)

x[t] = ((x[t] – T[i – m + 1][t] * powP[m - 1]) * P + T[i][t]) % mod;

对每一列都向下更新一次。

HDOJ 3531

#include <iostream>

using namespace std;

const int P( 23);

const int maxn( 1001);

const int mod( 2033);

int powP[maxn];

int m, n;

int F[maxn][maxn], T[maxn][maxn];

int x[maxn];

int dest;

int judge(int x, int y)

{

for(int i1(1), i2(x - m + 1); i1 <= m; i1++, i2++)

{

for(int j1(1), j2(y - m + 1); j1 <= m; j1++, j2++)

if(F[i2][j2] != T[i1][j1]) return 0;

}

return 1;

}

int main()

{

powP[0] = 1;

for(int i(1); i < maxn ; i++)

powP[i] = (powP[i - 1] * P) % mod;

while(scanf("%d %d", &n, &m) == 2)

{

for(int i(1); i <= n; i++)

for(int j(1); j <= n; j++)

scanf("%d", &F[i][j]);

for(int i(1); i <= m; i++)

for(int j(1); j <= m; j++)

scanf("%d", &T[i][j]);

x[0] = 0;

dest = 0;

for(int i(1); i <= m; i++)

{

x[i] = 0;

for(int j(1); j <= m; j++)

x[i] = (x[i] * P + T[j][i]) % mod;

dest = (dest * P + x[i]) % mod;

}

for(int i(1); i <= n; i++)

{

x[i] = 0;

for(int j(1); j <= m; j++)

x[i] = (x[i] * P + F[j][i]) % mod;

}

int sum = 0, flag = 0;

for(int i(m); i <= n; i++)

{

sum = 0;

for(int j(1); j <= n; j++)

{

if(j < m)

{

sum = (P * sum + x[j]) % mod;

}

else

{

sum = ((sum - x[j - m] * powP[m - 1]) * P + x[j] + mod) % mod;

if((sum + mod) % mod == dest)

{

if(judge(i, j))

{

flag = 1;

break;

}

if(i <= n - 1)

{

for(int j(1); j <= n; j++)

x[j] = ((x[j] - F[i - m + 1][j] * powP[m - 1]) * P + F[i + 1][j] + mod) % mod;

}

if(flag) break;

}

if(flag) puts("Yes");

else puts("No");

}

return 0;

}

posted on 2010-08-18 08:34 wujiawei 阅读(497) 评论(0) 编辑收藏引用

只有注册用户登录后才能发表评论。
【推荐】100%开源！大型工业跨平台软件C++源码提供，建模，组态！



网站导航: 博客园 IT新闻 BlogJava 博问 Chat2DB 管理

wujiawei@HIT

导航

留言簿

随笔档案

阅读排行榜

评论排行榜

常用链接

统计

最新评论

Rabin Karp 算法