PageRank
Time Limit: 10 Sec Memory Limit: 256 MB
Submissions: 168 Solved: 8
Description
Google is famous over the world, much of the reason for their success lies with the effective search result for their user. In the final, support this effective is the technique called PageRank which largely used the huge url link network. A simple example is a hyperlink in Page A linked to Page B is viewed as A give a vote of support to B.
For us, PageRank is a complex technique, to simplify the problem, let’s analysis a simple model first. For every hyperlink, we just give 2 kind of attribute, the tag on the hyperlink, and the target url hyperlink point. So a hyperlink is viewed as a tag vote to an url.
As a user of search engine, each time we search, we will offer an keywork. Now the problem is for each given keywork, check all given hyperlink, if the tag on hyperlink contain this keywork, we count it’s a effective vote, then list the top 10 most voted url. (if 2 urls get the same votes, we sort it with lexicographic order)
Input
First line is N, indicate N hyperlinks. (1<=N<=20000)
Next N lines:
Each line contain two string, the first is the tag of hyperlink and the second is the url.
We guarantee that tags just contain lowcase and the length is less than 7, and urls just not contain blank characters(blank spaces, tabs and line break characters), the length of urls is less than 30.
Next line is Q, indicate Q times search. (1<=Q<=50000)
Next Q lines:
Each line just contain a string, indicate the keyword offered by user.
Output
For each search, list the top10 most voted urls, and a blankspace and the vote number.(if less then 10 urls be voted, just list all). If no url get votes, just output “Sorry, no result satisfied." In a single line.
For the answer of difference search, output a blank line between them.(Never leave a redundant blank line before the end of file)
Important hint, huge output, printf is recommended.
Sample Input
10
hustacm http://acm.hust.edu.cn
acmicpc http://acm.hust.edu.cn
acm http://acm.hust.edu.cn
hoj http://acm.hust.edu.cn/thx
vjudge http://acm.hust.edu.cn/vt
forum http://acm.hust.edu.cn/forum
hhoj http://acm.hust.edu.cn/vt
isun http://acm.hust.edu.cn/vt
sempr http://acm.hust.edu.cn/thx
gaewah http://acm.hust.edu.cn/forum
6
acm
m
hoj
zy
j
h
Sample Output
http://acm.hust.edu.cn 3
http://acm.hust.edu.cn 3
http://acm.hust.edu.cn/forum 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/vt 1
Sorry, no result satisfied.
http://acm.hust.edu.cn/vt 2
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn 1
http://acm.hust.edu.cn/forum 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/vt 1
HINT
Source
HONG Zehua / HUST Monthly 2011.04.09
繁琐的字符串插入查找,Trie 灵活应用,因为空间问题,用了一级指针,二级指针,链表。
预先开一个字符串buffer,用于存储 tag 和 url,其它保存 url 的地方只保存相应指针。
先根据 url 字典序将输入的 < tag,url > 排序,利用 Trie 统计对于同一个 url 的所有tag的所有子串对此 url 的 vote,同一tag的相同子串不要重复计数,通过在 Trie 中加入mask实现。
Trie 中节点只保存本关键字投票的 Vote 链表的指针,若此节点对某 url 有投票,则 Vote 链表非空,且只保存得票最高的前 10 名。
程序用时 1932 MS。
1#include <stdio.h>
2#include <string.h>
3#include <stdlib.h>
4
5
6#define N 20009
7#define INF 0x3f3f3f3f
8
9#define STR_LEN 35
10#define BUF_TM (N+N)
11
12char buf[ BUF_TM ][ STR_LEN ];
13int buf_end;
14
15#define buf_init() buf_end = 0
16#define buf_new() buf[ buf_end++ ]
17
18
19struct __Data
20{
21 char *tag, *url;
22};
23typedef struct __Data Data;
24
25int cmpData( const void *a, const void *b ) {
26 return strcmp( ((Data*)a)->url, ((Data*)b)->url );
27}
28Data data[ N ];
29
30
31#define VOTE_TM 3200000
32#define VOTE_LIM 10
33
34struct __Vote
35{
36 char *pSz;
37 int vote;
38 struct __Vote *next;
39};
40typedef struct __Vote Vote;
41Vote vote_mem[ VOTE_TM ];
42int vote_end;
43
44#define vote_init() vote_end = 0
45#define vote_bef(a,b) ( ((a)->vote > (b)->vote) || (((a)->vote == (b)->vote)&&(strcmp((a)->pSz,(b)->pSz)<=0)) )
46
47Vote* vote_new() {
48 Vote *p = vote_mem + vote_end++;
49 memset( p, 0, sizeof(Vote) );
50 return p;
51}
52
53
54#define TRIE_TC 26
55#define TRIE_TM 2000000
56
57struct __Trie
58{
59 int flag, mask, cnt;
60 Vote *pVote;
61 struct __Trie *ch[ TRIE_TC ];
62};
63typedef struct __Trie Trie;
64Trie trie_mem[ TRIE_TM ];
65int trie_end;
66
67#define trie_init() trie_end = 0
68
69Trie* trie_new() {
70 Trie *p = trie_mem + trie_end++;
71 memset( p, 0, sizeof(Trie) );
72 return p;
73}
74
75void trie_addcnt( Trie **pRoot, char *p, int mask ) {
76 for ( ; ; ) {
77 if ( (*pRoot) == NULL ) {
78 (*pRoot) = trie_new();
79 }
80 if ( *p ) {
81 pRoot = &( (*pRoot)->ch[ (*p) - 'a' ] );
82 ++p;
83 }
84 else {
85 if ( (*pRoot)->mask != mask ) {
86 (*pRoot)->mask = mask;
87 ++((*pRoot)->cnt);
88 }
89 return;
90 }
91 }
92}
93
94int trie_getcnt( Trie *root, char *p ) {
95 for ( ; ; ) {
96 if ( root == NULL ) {
97 return 0;
98 }
99 if ( *p ) {
100 root = root->ch[ (*p) - 'a' ];
101 ++p;
102 }
103 else {
104 return root->cnt;
105 }
106 }
107 return 0;
108}
109
110void trie_insert( Trie **pRoot, char *p, char *url, int vote, int mask ) {
111 for ( ; ; ) {
112 if ( (*pRoot) == NULL ) {
113 (*pRoot) = trie_new();
114 }
115 if ( *p ) {
116 pRoot = &( (*pRoot)->ch[ (*p) - 'a' ] );
117 ++p;
118 }
119 else {
120 (*pRoot)->cnt = 0;
121 if ( (*pRoot)->mask != mask ) {
122 Vote *pred, **ppVote, *nv;
123 int cnt;
124
125 (*pRoot)->flag = 1;
126 (*pRoot)->mask = mask;
127
128 ppVote = &((*pRoot)->pVote);
129 if ( (*ppVote) == NULL ) {
130 (*ppVote) = vote_new();
131 (*ppVote)->vote = INF;
132 }
133
134 nv = vote_new();
135 nv->pSz = url;
136 nv->vote = vote;
137
138 pred = (*ppVote);
139 ppVote = &(pred->next);
140 while ( (*ppVote) != NULL ) {
141 if ( vote_bef(pred,nv) && vote_bef(nv,(*ppVote)) ) {
142 if ( strcmp( nv->pSz, (*ppVote)->pSz ) != 0 ) {
143 nv->next = pred->next;
144 pred->next = nv;
145 }
146 break;
147 }
148 pred = (*ppVote);
149 ppVote = &(pred->next);
150 }
151
152 if ( (*ppVote) == NULL ) {
153 (*ppVote) = nv;
154 }
155
156 cnt = 1;
157 for ( nv = (*pRoot)->pVote->next; (nv!=NULL)&&(cnt<VOTE_LIM); nv=nv->next ) {
158 ++cnt;
159 }
160 if ( nv != NULL ) {
161 nv->next = NULL;
162 }
163 }
164 return;
165 }
166 }
167}
168
169Trie* trie_find( Trie *root, char *p ) {
170 for ( ; ; ) {
171 if ( root == NULL ) {
172 return NULL;
173 }
174 if ( *p ) {
175 root = root->ch[ (*p) - 'a' ];
176 ++p;
177 }
178 else {
179 return root;
180 }
181 }
182}
183
184void solve( Trie *root, char *pSz ) {
185 Trie *p = trie_find( root, pSz );
186 Vote *i;
187 if ( (p==NULL) || (!p->flag) ) {
188 printf( "Sorry, no result satisfied.\n" );
189 return;
190 }
191 for ( i = p->pVote->next; i!=NULL; i=i->next ) {
192 printf( "%s %d\n", i->pSz, i->vote );
193 }
194}
195
196int main() {
197 char tmp[ STR_LEN ];
198 int n, mask = 10, i, j, k, x, y, vote;
199 Trie *root = NULL;
200
201 vote_init();
202 trie_init();
203 buf_init();
204 scanf( "%d", &n );
205 for ( i = 0; i < n; ++i ) {
206 data[ i ].tag = buf_new();
207 data[ i ].url = buf_new();
208 scanf( "%s%s", data[ i ].tag, data[ i ].url );
209 }
210 qsort( data, n, sizeof(data[0]), cmpData );
211
212 i = j = 0;
213 do {
214 while ( (j<n) && (strcmp(data[j].url,data[i].url)==0) ) {
215 ++j;
216 }
217
218 for ( k = i; k < j; ++k ) {
219 ++mask;
220 for ( x = 0; data[k].tag[x]; ++x ) {
221 for ( y = x; data[k].tag[y]; ++y ) {
222 tmp[ y-x ] = data[k].tag[y];
223 tmp[ y-x+1 ] = 0;
224 trie_addcnt( &root, tmp, mask );
225 }
226 }
227 }
228
229 for ( k = i; k < j; ++k ) {
230 ++mask;
231 for ( x = 0; data[k].tag[x]; ++x ) {
232 for ( y = x; data[k].tag[y]; ++y ) {
233 tmp[ y-x ] = data[k].tag[y];
234 tmp[ y-x+1 ] = 0;
235 vote = trie_getcnt( root, tmp );
236 if ( vote > 0 ) {
237 trie_insert( &root, tmp, data[i].url, vote, mask );
238 }
239 }
240 }
241 }
242
243 i = j;
244 } while ( j < n );
245
246 scanf( "%d", &n );
247 while ( n-- > 0 ) {
248 scanf( "%s", tmp );
249 solve( root, tmp );
250 if ( n > 0 ) {
251 printf( "\n" );
252 }
253 }
254 return 0;
255}
256