Rabin-Karp算法是由Rabin和Karp提出的一个在实际中有比较好应用的字符串匹配算法,此算法的预处理时间为O(m),但它的在最坏情况下的时间复杂度为O((2n-m+1)m),而平均复杂度接近O(m+n),此算法的主要思想就是通过对字符串进行哈稀运算,使得算法可以容易的派出 大量的不相同的字符串,假设模式字符串的长度为m,利用 Horner法则p = p[m] + 10(p[m -1] + 10(p[m-2]+...+10(p[2]+10p[1])...)),求出模式字符串的哈稀值p,而对于文本字符串来说,对应于每个长度为m的子串的 哈稀值为t(s+1)=10(t(s)-10^(m-1)T[s+1])+T[s+m+1],然后比较此哈稀值与模式字符串的哈稀值是否相等,若不相同, 则字符串一定不同,若相同,则需要进一步的按位比较,所以它的最坏情况下的时间复杂度为O(mn) 附上example: First attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | G | C | A | G | A | G | A | G | |
hash(y[0 .. 7]) = 17819 Second attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[1 .. 8]) = 17533 Third attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[2 .. 9]) = 17979 Fourth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[3 .. 10]) = 19389 Fifth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[4 .. 11]) = 17339 Sixth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | | | G | C | A | G | A | G | A | G | |
hash(y[5 .. 12]) = 17597 Seventh attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[6 .. 13]) = 17102 Eighth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[7 .. 14]) = 17117 Ninth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[8 .. 15]) = 17678 Tenth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[9 .. 16]) = 17245 Eleventh attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[10 .. 17]) = 17917 Twelfth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[11 .. 18]) = 17723 Thirteenth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[12 .. 19]) = 18877 Fourteenth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[13 .. 20]) = 19662 Fifteenth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[14 .. 21]) = 17885 Sixteenth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G | |
hash(y[15 .. 22]) = 19197 Seventeenth attempt | G | C | A | T | C | G | C | A | G | A | G | A | G | T | A | T | A | C | A | G | T | A | C | G | | | G | C | A | G | A | G | A | G |
hash(y[16 .. 23]) = 16961 所以有8个字符匹配 该方法可扩展为二维,即寻找是否有匹配的矩阵。 即只需要将矩阵转化为一个值: A[0][0]*P2*Q2 A[0][1]*P1 *Q2 A[0][2]*P0 *Q2 A[0][0]*P2 *Q1 A[0][1]*P1 *Q1 A[0][2]*P0*Q1 A[0][0]*P2 *Q0 A[0][1]*P1 *Q0 A[0][2]*P0 *Q0 这样将矩阵的所有值相加就得到了最后的key值,枚举矩阵的每一个位置,看形成的矩阵的key是否与目标矩阵相同,如果相同就判断其中每一元素是否一样,不相同则不可能匹配,继续下一位置。 在一维的情况下,对于每次动态更新要 tmp = ((tmp – x[i – m] * powP[m - 1] ) * P + x[i]) % mod; 在二维的情况下,首先对于大的矩阵(要在其中查找是否匹配的矩阵),要对每个列进行求值,然后累加,并且在更新行的时候只需 tmp = ((tmp – x[i – m] * powP[m - 1] ) * P + x[i]) % mod;(与上面相同) 更新列的时候: For(t = 1 -> n) x[t] = ((x[t] – T[i – m + 1][t] * powP[m - 1]) * P + T[i][t]) % mod; 对每一列都向下更新一次。 HDOJ 3531 #include <iostream> using namespace std; const int P( 23); const int maxn( 1001); const int mod( 2033); int powP[maxn]; int m, n; int F[maxn][maxn], T[maxn][maxn]; int x[maxn]; int dest; int judge(int x, int y) { for(int i1(1), i2(x - m + 1); i1 <= m; i1++, i2++) { for(int j1(1), j2(y - m + 1); j1 <= m; j1++, j2++) if(F[i2][j2] != T[i1][j1]) return 0; } return 1; } int main() { powP[0] = 1; for(int i(1); i < maxn ; i++) powP[i] = (powP[i - 1] * P) % mod; while(scanf("%d %d", &n, &m) == 2) { for(int i(1); i <= n; i++) for(int j(1); j <= n; j++) scanf("%d", &F[i][j]); for(int i(1); i <= m; i++) for(int j(1); j <= m; j++) scanf("%d", &T[i][j]); x[0] = 0; dest = 0; for(int i(1); i <= m; i++) { x[i] = 0; for(int j(1); j <= m; j++) x[i] = (x[i] * P + T[j][i]) % mod; dest = (dest * P + x[i]) % mod; } for(int i(1); i <= n; i++) { x[i] = 0; for(int j(1); j <= m; j++) x[i] = (x[i] * P + F[j][i]) % mod; } int sum = 0, flag = 0; for(int i(m); i <= n; i++) { sum = 0; for(int j(1); j <= n; j++) { if(j < m) { sum = (P * sum + x[j]) % mod; } else { sum = ((sum - x[j - m] * powP[m - 1]) * P + x[j] + mod) % mod; if((sum + mod) % mod == dest) { if(judge(i, j)) { flag = 1; break; } } } } if(i <= n - 1) { for(int j(1); j <= n; j++) x[j] = ((x[j] - F[i - m + 1][j] * powP[m - 1]) * P + F[i + 1][j] + mod) % mod; } if(flag) break; } if(flag) puts("Yes"); else puts("No"); } return 0; } |