POJ 2050 Searching the Web 数据结构

这题基本上没有算法，只用了一个字符串的hash。
但代码很长，200+行。非常荣幸的1ac了！

#include <stdio.h>

#include <string.h>

#define MAX_LINES 2048

#define MAX_LINE_LEN 128

#define MAX_DOCS 128

#define MAX_WORDS 65536

#define MAX_WORDS_PER_LINE 128

struct node {

int doc, line;

struct node *next;

};

char lines[MAX_LINES][MAX_LINE_LEN];

int docs[MAX_DOCS][MAX_LINES];

int docs_cnt;

unsigned int occur[MAX_DOCS][(MAX_WORDS + 31) / 32];

unsigned int hash[MAX_WORDS];

struct node nodes[MAX_WORDS], *head[MAX_WORDS], *tail[MAX_WORDS];

int nodes_cnt;

inline unsigned int strhash(char *str, int len)

{

unsigned int h = 0;

while (len--) {

h *= 31;

if (*str >= 'A' && *str <= 'Z')

h += *str - 'A' + 'a';

else

h += *str;

str++;

}

return h;

}

inline unsigned int test_bit(unsigned int *arr, int idx)

{

return arr[idx >> 5] & (1 << (idx & 0x1f));

}

inline void set_bit(unsigned int *arr, int idx)

{

arr[idx >> 5] |= 1 << (idx & 0x1f);

}

inline int is_alpha(char ch)

{

return ch >= 'a' && ch <= 'z' ||

ch >= 'A' && ch <= 'Z'

;

}

inline int split(char *line, char **strs, int *lens)

{

int cnt = 0;

while (*line) {

while (*line && !is_alpha(*line))

line++;

if (!*line)

break;

*strs++ = line;

for (*lens = 0; is_alpha(*line); (*lens)++)

line++;

lens++;

cnt++;

}

return cnt;

}

inline int find(unsigned int h)

{

int i = h % MAX_WORDS;

while (hash[i] && hash[i] != h)

i = (i + 1) % MAX_WORDS;

return i;

}

inline void insert(char *str, int len, int doc, int line)

{

unsigned int h = strhash(str, len);

int i = find(h);

struct node *t = &nodes[nodes_cnt++];

hash[i] = h;

t->doc = doc;

t->line = line;

if (!head[i])

head[i] = t;

else

tail[i]->next = t;

tail[i] = t;

set_bit(occur[doc], i);

}

int output_cnt, last_doc;

inline void output(int doc, int line)

{

if (last_doc == -1)

last_doc = doc;

if (last_doc != doc) {

printf("----------\n");

last_doc = doc;

}

printf("%s\n", lines[docs[doc][line]]);

output_cnt++;

}

struct node *list[MAX_LINES];

inline void mark_one(int id, int doc)

{

struct node *t;

for (t = head[id]; t; t = t->next)

if (doc == -1 || t->doc == doc)

list[docs[t->doc][t->line]] = t;

}

inline void dump_list()

{

int i;

for (i = 0; i < MAX_LINES; i++)

if (list[i])

output(list[i]->doc, list[i]->line);

}

inline void search_one(int id)

{

memset(list, 0, sizeof(list));

mark_one(id, -1);

dump_list();

}

inline void search_and(int id1, int id2)

{

int i;

memset(list, 0, sizeof(list));

for (i = 0; i < docs_cnt; i++)

if (test_bit(occur[i], id1) && test_bit(occur[i], id2)) {

mark_one(id1, i);

mark_one(id2, i);

}

dump_list();

}

inline void search_or(int id1, int id2)

{

memset(list, 0, sizeof(list));

mark_one(id1, -1);

mark_one(id2, -1);

dump_list();

}

inline void search_not(int id)

{

int i, j;

for (i = 0; i < docs_cnt; i++)

if (!test_bit(occur[i], id))

for (j = 0; docs[i][j]; j++)

output(i, j);

}

int main()

{

int i, j, k, c, n, l = 1, lens[MAX_WORDS_PER_LINE], id[8];

char *strs[MAX_WORDS_PER_LINE];

freopen("e:\\in.txt", "r", stdin);

scanf("%d\n", &docs_cnt);

for (i = 0; i < docs_cnt; i++) {

for (j = 0;

gets(lines[l]), strcmp(lines[l], "**********");

j++, l++)

{

docs[i][j] = l;

c = split(lines[l], strs, lens);

for (k = 0; k < c; k++)

insert(strs[k], lens[k], i, j);

}

scanf("%d\n", &n);

while (n--) {

gets(lines[l]);

c = split(lines[l], strs, lens);

output_cnt = 0;

last_doc = -1;

for (i = 0; i < c; i++)

id[i] = find(strhash(strs[i], lens[i]));

if (c == 1)

search_one(id[0]);

else if (c == 2)

search_not(id[1]);

else if (strs[1][0] == 'A')

search_and(id[0], id[2]);

else

search_or(id[0], id[2]);

if (!output_cnt)

printf("Sorry, I found nothing.\n");

printf("==========\n");

}

return 0;

}

posted on 2010-07-23 13:24 糯米阅读(576) 评论(0) 编辑收藏引用所属分类: POJ

只有注册用户登录后才能发表评论。
【推荐】100%开源！大型工业跨平台软件C++源码提供，建模，组态！

相关文章: POJ 3123 Ticket to Ride 高效解法 POJ 3123 Ticket to Ride 动态规划+Minimal Steiner Tree Minimal Steiner Tree 简介 POJ 3122 Pie 二分 POJ 3121 The SetStack Computer 哈希 POJ 3120 Sudoku 搜索 POJ 3156 Interconnect 图论+数论 POJ 3155 Hard Life 最大密度子图 POJ 3154 Graveyard 模拟 POJ 3150 Cellular Automaton 矩阵乘法+二分

网站导航: 博客园 IT新闻 BlogJava 博问 Chat2DB 管理

糯米

POJ 2050 Searching the Web 数据结构

导航

随笔分类

Links

最新评论

阅读排行榜