HOJ1065--EntropyAnalysis:This problem requires that we find the optimal coding strategy for a given string,and compare the length of the code by bits with the length when using the ASCII(8-bit) code.Apparently,we know that the optimal coding strategy will be the
Huffman Coding.
Huffman Coding has been proved to be the most efficient way of coding,in terms of prefix-free encoding.The idea is quite simple.Given a set of characters,and the frequncies of appearance in the given string(or we can find that out easily if it's not given),we constantly apply the following rule to encode the whole thing:First we find the two characters with the lowest frequencies,and let them be the children in a binary tree,with their father having the frequency equal to the sum of the two frequencies.Then we keep doing this until we build a
Binary Tree,which is called in this case,a Huffman Tree.Mission accomplished!If we want to know exactly the code for each character,we assign the edge between father and its left child the value 0,and the other edge value 1.Thus we can get the code by following the path from each leaf to the root.If we want to know the total code length of the whole string,we then make a sum of the code length of each character multiplied by its frequency.OK,problem solved!
Code:
1 #include<iostream>
2 #include<cstring>
3 #include<algorithm>
4 using namespace std;
5 const int MAXSIZE=20001;
6 struct elementype
7 {
8 int value;
9 int lchild,rchild;
10 bool isleaf;
11 int rank;
12 }pool[MAXSIZE];
13 struct HEAP
14 {
15 int n;
16 int elements[MAXSIZE];
17 }heap;
18 void MakeHeap()
19 {
20 heap.n=0;
21 }
22 bool HeapEmpty()
23 {
24 return (!heap.n);
25 }
26 bool HeapFull()
27 {
28 return heap.n==MAXSIZE-1;
29 }
30 int HeapSize()
31 {
32 return heap.n;
33 }
34 void Insert_Heap(int element)
35 {
36 int i;
37 if(!HeapFull())
38 {
39 i=++heap.n;
40 while(i>1 && pool[element].value < pool[heap.elements[i/2]].value)
41 {
42 heap.elements[i]=heap.elements[i/2];
43 i/=2;
44 }
45 }//Insert strategy,put the new element at the bottom and move it up according to its value
46 //compared with its father
47 heap.elements[i]=element;
48 }
49 int Delete_Heap()
50 {
51 int parent=1,child=2;
52 int tmp,result;
53 if(!HeapEmpty())
54 {
55 result=heap.elements[1];//remove the element on top
56 tmp=heap.elements[heap.n--];//put the last element on top temporarily
57 while(child<=heap.n)
58 {
59 if(child<heap.n && pool[heap.elements[child+1]].value<pool[heap.elements[child]].value)
60 {
61 child++;
62 }//compare each node with the larger one of its children
63 if(pool[tmp].value<=pool[heap.elements[child]].value) break;//put the element where it belongs
64 heap.elements[parent]=heap.elements[child];//if the child has a larger value,make it father
65 parent=child;
66 child*=2;
67 }
68 heap.elements[parent]=tmp;
69 }
70 return result;
71 }
72 double s;
73 void Preorder(int root)
74 {
75 if(pool[root].isleaf)
76 {
77 s+=pool[root].value*pool[root].rank;
78 }
79 else
80 {
81 pool[pool[root].lchild].rank=pool[pool[root].rchild].rank=pool[root].rank+1;
82 Preorder(pool[root].lchild);
83 Preorder(pool[root].rchild);
84 }
85 }
86 char op[10001];
87 int main()
88 {
89 int n,k,p1,p2;
90 while(gets(op))
91 {
92 if(strcmp(op,"END")==0) break;
93 MakeHeap();
94 n=(int)strlen(op);
95 sort(op,op+n);
96 char cur=op[0];int counter=0;k=1;
97 for(int i=0;i<=n;i++)
98 {
99 if(cur!=op[i])
100 {
101 cur=op[i];
102 pool[k].value=counter;
103 pool[k].lchild=pool[k].rchild=k;
104 pool[k].isleaf=true;Insert_Heap(k);k++;
105 counter=1;
106 }
107 else counter++;
108 }
109 if(k==2)
110 {
111 printf("%d %d %.1lf\n",8*n,n,8.0);
112 continue;
113 }
114 while(1)
115 {
116 p1=Delete_Heap();p2=Delete_Heap();
117 pool[k].lchild=p1;pool[k].rchild=p2;pool[k].value=pool[p1].value+pool[p2].value;
118 pool[k].isleaf=false;
119 if(HeapEmpty())
120 break;
121 else
122 {
123 Insert_Heap(k);
124 k++;
125 }
126 }
127 pool[k].rank=0,s=0;
128 Preorder(k);
129 printf("%d %.0lf %.1lf\n",8*n,s,(double)(8*n)/s);
130 //getchar();
131 }
132 return 0;
133 }
134
135
Note that,when trying to find the two characters with the lowest frequencies,we use a
Binary Heap here.This task can also be completed by other methods.