PageRank
Time Limit: 10 Sec Memory Limit: 256 MB
Submissions: 168 Solved: 8
Description
Google is famous over the world, much of the reason for their success lies with the effective search result for their user. In the final, support this effective is the technique called PageRank which largely used the huge url link network. A simple example is a hyperlink in Page A linked to Page B is viewed as A give a vote of support to B.
For us, PageRank is a complex technique, to simplify the problem, let’s analysis a simple model first. For every hyperlink, we just give 2 kind of attribute, the tag on the hyperlink, and the target url hyperlink point. So a hyperlink is viewed as a tag vote to an url.
As a user of search engine, each time we search, we will offer an keywork. Now the problem is for each given keywork, check all given hyperlink, if the tag on hyperlink contain this keywork, we count it’s a effective vote, then list the top 10 most voted url. (if 2 urls get the same votes, we sort it with lexicographic order)
Input
First line is N, indicate N hyperlinks. (1<=N<=20000)
Next N lines:
Each line contain two string, the first is the tag of hyperlink and the second is the url.
We guarantee that tags just contain lowcase and the length is less than 7, and urls just not contain blank characters(blank spaces, tabs and line break characters), the length of urls is less than 30.
Next line is Q, indicate Q times search. (1<=Q<=50000)
Next Q lines:
Each line just contain a string, indicate the keyword offered by user.
Output
For each search, list the top10 most voted urls, and a blankspace and the vote number.(if less then 10 urls be voted, just list all). If no url get votes, just output “Sorry, no result satisfied." In a single line.
For the answer of difference search, output a blank line between them.(Never leave a redundant blank line before the end of file)
Important hint, huge output, printf is recommended.
Sample Input
10
hustacm http://acm.hust.edu.cn
acmicpc http://acm.hust.edu.cn
acm http://acm.hust.edu.cn
hoj http://acm.hust.edu.cn/thx
vjudge http://acm.hust.edu.cn/vt
forum http://acm.hust.edu.cn/forum
hhoj http://acm.hust.edu.cn/vt
isun http://acm.hust.edu.cn/vt
sempr http://acm.hust.edu.cn/thx
gaewah http://acm.hust.edu.cn/forum
6
acm
m
hoj
zy
j
h
Sample Output
http://acm.hust.edu.cn 3
http://acm.hust.edu.cn 3
http://acm.hust.edu.cn/forum 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/vt 1
Sorry, no result satisfied.
http://acm.hust.edu.cn/vt 2
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn 1
http://acm.hust.edu.cn/forum 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/vt 1
HINT
Source
HONG Zehua / HUST Monthly 2011.04.09
繁琐的字符串插入查找,Trie 灵活应用,因为空间问题,用了一级指针,二级指针,链表。
预先开一个字符串buffer,用于存储 tag 和 url,其它保存 url 的地方只保存相应指针。
先根据 url 字典序将输入的 < tag,url > 排序,利用 Trie 统计对于同一个 url 的所有tag的所有子串对此 url 的 vote,同一tag的相同子串不要重复计数,通过在 Trie 中加入mask实现。
Trie 中节点只保存本关键字投票的 Vote 链表的指针,若此节点对某 url 有投票,则 Vote 链表非空,且只保存得票最高的前 10 名。
程序用时 1932 MS。
1
#include <stdio.h>
2
#include <string.h>
3
#include <stdlib.h>
4
5
6
#define N 20009
7
#define INF 0x3f3f3f3f
8
9
#define STR_LEN 35
10
#define BUF_TM (N+N)
11
12
char buf[ BUF_TM ][ STR_LEN ];
13
int buf_end;
14
15
#define buf_init() buf_end = 0
16
#define buf_new() buf[ buf_end++ ]
17
18
19
struct __Data
20

{
21
char *tag, *url;
22
};
23
typedef struct __Data Data;
24
25
int cmpData( const void *a, const void *b )
{
26
return strcmp( ((Data*)a)->url, ((Data*)b)->url );
27
}
28
Data data[ N ];
29
30
31
#define VOTE_TM 3200000
32
#define VOTE_LIM 10
33
34
struct __Vote
35

{
36
char *pSz;
37
int vote;
38
struct __Vote *next;
39
};
40
typedef struct __Vote Vote;
41
Vote vote_mem[ VOTE_TM ];
42
int vote_end;
43
44
#define vote_init() vote_end = 0
45
#define vote_bef(a,b) ( ((a)->vote > (b)->vote) || (((a)->vote == (b)->vote)&&(strcmp((a)->pSz,(b)->pSz)<=0)) )
46
47
Vote* vote_new()
{
48
Vote *p = vote_mem + vote_end++;
49
memset( p, 0, sizeof(Vote) );
50
return p;
51
}
52
53
54
#define TRIE_TC 26
55
#define TRIE_TM 2000000
56
57
struct __Trie
58

{
59
int flag, mask, cnt;
60
Vote *pVote;
61
struct __Trie *ch[ TRIE_TC ];
62
};
63
typedef struct __Trie Trie;
64
Trie trie_mem[ TRIE_TM ];
65
int trie_end;
66
67
#define trie_init() trie_end = 0
68
69
Trie* trie_new()
{
70
Trie *p = trie_mem + trie_end++;
71
memset( p, 0, sizeof(Trie) );
72
return p;
73
}
74
75
void trie_addcnt( Trie **pRoot, char *p, int mask )
{
76
for ( ; ; )
{
77
if ( (*pRoot) == NULL )
{
78
(*pRoot) = trie_new();
79
}
80
if ( *p )
{
81
pRoot = &( (*pRoot)->ch[ (*p) - 'a' ] );
82
++p;
83
}
84
else
{
85
if ( (*pRoot)->mask != mask )
{
86
(*pRoot)->mask = mask;
87
++((*pRoot)->cnt);
88
}
89
return;
90
}
91
}
92
}
93
94
int trie_getcnt( Trie *root, char *p )
{
95
for ( ; ; )
{
96
if ( root == NULL )
{
97
return 0;
98
}
99
if ( *p )
{
100
root = root->ch[ (*p) - 'a' ];
101
++p;
102
}
103
else
{
104
return root->cnt;
105
}
106
}
107
return 0;
108
}
109
110
void trie_insert( Trie **pRoot, char *p, char *url, int vote, int mask )
{
111
for ( ; ; )
{
112
if ( (*pRoot) == NULL )
{
113
(*pRoot) = trie_new();
114
}
115
if ( *p )
{
116
pRoot = &( (*pRoot)->ch[ (*p) - 'a' ] );
117
++p;
118
}
119
else
{
120
(*pRoot)->cnt = 0;
121
if ( (*pRoot)->mask != mask )
{
122
Vote *pred, **ppVote, *nv;
123
int cnt;
124
125
(*pRoot)->flag = 1;
126
(*pRoot)->mask = mask;
127
128
ppVote = &((*pRoot)->pVote);
129
if ( (*ppVote) == NULL )
{
130
(*ppVote) = vote_new();
131
(*ppVote)->vote = INF;
132
}
133
134
nv = vote_new();
135
nv->pSz = url;
136
nv->vote = vote;
137
138
pred = (*ppVote);
139
ppVote = &(pred->next);
140
while ( (*ppVote) != NULL )
{
141
if ( vote_bef(pred,nv) && vote_bef(nv,(*ppVote)) )
{
142
if ( strcmp( nv->pSz, (*ppVote)->pSz ) != 0 )
{
143
nv->next = pred->next;
144
pred->next = nv;
145
}
146
break;
147
}
148
pred = (*ppVote);
149
ppVote = &(pred->next);
150
}
151
152
if ( (*ppVote) == NULL )
{
153
(*ppVote) = nv;
154
}
155
156
cnt = 1;
157
for ( nv = (*pRoot)->pVote->next; (nv!=NULL)&&(cnt<VOTE_LIM); nv=nv->next )
{
158
++cnt;
159
}
160
if ( nv != NULL )
{
161
nv->next = NULL;
162
}
163
}
164
return;
165
}
166
}
167
}
168
169
Trie* trie_find( Trie *root, char *p )
{
170
for ( ; ; )
{
171
if ( root == NULL )
{
172
return NULL;
173
}
174
if ( *p )
{
175
root = root->ch[ (*p) - 'a' ];
176
++p;
177
}
178
else
{
179
return root;
180
}
181
}
182
}
183
184
void solve( Trie *root, char *pSz )
{
185
Trie *p = trie_find( root, pSz );
186
Vote *i;
187
if ( (p==NULL) || (!p->flag) )
{
188
printf( "Sorry, no result satisfied.\n" );
189
return;
190
}
191
for ( i = p->pVote->next; i!=NULL; i=i->next )
{
192
printf( "%s %d\n", i->pSz, i->vote );
193
}
194
}
195
196
int main()
{
197
char tmp[ STR_LEN ];
198
int n, mask = 10, i, j, k, x, y, vote;
199
Trie *root = NULL;
200
201
vote_init();
202
trie_init();
203
buf_init();
204
scanf( "%d", &n );
205
for ( i = 0; i < n; ++i )
{
206
data[ i ].tag = buf_new();
207
data[ i ].url = buf_new();
208
scanf( "%s%s", data[ i ].tag, data[ i ].url );
209
}
210
qsort( data, n, sizeof(data[0]), cmpData );
211
212
i = j = 0;
213
do
{
214
while ( (j<n) && (strcmp(data[j].url,data[i].url)==0) )
{
215
++j;
216
}
217
218
for ( k = i; k < j; ++k )
{
219
++mask;
220
for ( x = 0; data[k].tag[x]; ++x )
{
221
for ( y = x; data[k].tag[y]; ++y )
{
222
tmp[ y-x ] = data[k].tag[y];
223
tmp[ y-x+1 ] = 0;
224
trie_addcnt( &root, tmp, mask );
225
}
226
}
227
}
228
229
for ( k = i; k < j; ++k )
{
230
++mask;
231
for ( x = 0; data[k].tag[x]; ++x )
{
232
for ( y = x; data[k].tag[y]; ++y )
{
233
tmp[ y-x ] = data[k].tag[y];
234
tmp[ y-x+1 ] = 0;
235
vote = trie_getcnt( root, tmp );
236
if ( vote > 0 )
{
237
trie_insert( &root, tmp, data[i].url, vote, mask );
238
}
239
}
240
}
241
}
242
243
i = j;
244
} while ( j < n );
245
246
scanf( "%d", &n );
247
while ( n-- > 0 )
{
248
scanf( "%s", tmp );
249
solve( root, tmp );
250
if ( n > 0 )
{
251
printf( "\n" );
252
}
253
}
254
return 0;
255
}
256