STL中的关联容器一般用平衡树来实现,多数是红黑树,平衡树的单个插入、查找、删除操作时间复杂度都为O(logn),但是为了保证元素之间的相对次 序,每个元素需要三个指针和一个状态位的内存开销,还有,在堆上分配的每块内存都有几个字节的metadata开销。如果用于处理短类型的元素,这实在不划算。况且很多场合我们并不需要频繁插入和删除元素,仅仅是大量检 索操作。
如果仅需要大量查找,且不会频繁的插入和删除(至多批量进行),且需要尽可能的节约内存,则C++ STL中的关联容器用排序数组比用平衡树作为基本数据结构来实现更合适。这种场合多见于嵌入式应用。
废话少说,代码很简单,瞄一眼就知道怎么回事,下面便是排序数组sorted_vector.hpp文件的代码。
1 #ifndef SORTED_VECTOR_HPP_
2 #define SORTED_VECTOR_HPP_
3
4 #include <vector>
5 #include <algorithm>
6 #include <functional>
7
8 namespace Sample // 为了避免重名,用namespace封起来
9 {
10 // very simple and ugly base class for map, multimap, set, multiset
11 // it is designed to save memory as possible as it can, but at the cost
12 // of very slow insertion and erasure operations. if memory is not a
13 // problem, the balanced tree should be used instead of a sorted vector
14 // as the basic data structure
15
16 /*
17 template <typename Key, // the key type for containers map, set and multixxx
18 typename Value, // the real value, for map and multimap it is a pair
19 // for set and multiset it is the same as Key
20 typename KeyOfValue, // object (select1st, identity) to extract key from Value
21 typename Compare> // object (less, greater) to compare two keys
22 class sorted_vector;
23 */
24
25 template <typename Key, typename Value, typename KeyOfValue, typename Compare>
26 class sorted_vector
27 {
28 public:
29 typedef Key key_type;
30 typedef Value value_type;
31 typedef KeyOfValue key_of_value;
32 typedef Compare key_compare;
33 typedef typename std::vector<value_type>::iterator iterator;
34 typedef typename std::vector<value_type>::const_iterator const_iterator;
35 typedef typename std::vector<value_type>::reference reference;
36 typedef typename std::vector<value_type>::const_reference const_reference;
37 typedef typename std::vector<value_type>::size_type size_type;
38 typedef typename std::vector<value_type>::reverse_iterator reverse_iterator;
39 typedef typename std::vector<value_type>::const_reverse_iterator const_reverse_iterator;
40
41 public:
42 sorted_vector() : kv(), cmp(), m_cont() {}
43 explicit sorted_vector(size_type n) : kv(), cmp(), m_cont(n) {}
44
45 sorted_vector(const sorted_vector& s)
46 : kv(s.kv), cmp(s.cmp), m_cont(s.m_cont) {}
47
48 sorted_vector(const key_of_value& kof, const key_compare& kc)
49 : kv(kof), cmp(kc), m_cont() {}
50
51 iterator begin() { return m_cont.begin(); }
52 const_iterator begin() const { return m_cont.begin(); }
53 iterator end() { return m_cont.end(); }
54 const_iterator end() const { return m_cont.end(); }
55 reverse_iterator rbegin() { return m_cont.rbegin(); }
56 const_reverse_iterator rbegin() const { return m_cont.rbegin(); }
57 reverse_iterator rend() { return m_cont.rend(); }
58 const_reverse_iterator rend() const { return m_cont.rend(); }
59
60 bool empty() const { return m_cont.empty(); }
61 size_type size() const { return m_cont.size(); }
62 size_type max_size() const { return m_cont.max_size(); }
63
64 const_reference operator[] (size_type idx) const { return m_cont[idx]; }
65
66 void swap(sorted_vector& t) // exchange two sorted_vector objects
67 {
68 m_cont.swap(t.m_cont);
69 std::swap(kv, t.kv);
70 std::swap(cmp, t.cmp);
71 }
72
73 void reserve(size_type n) { m_cont.reserve(n); } // an extension
74
75 // erased all equal to x and returned the number of erased
76 size_type erase(const key_type& x)
77 {
78 std::pair<iterator, iterator> pit = equal_range(x);
79 m_cont.erase(pit.first, pit.second);
80 return pit.second - pit.first;
81 }
82
83 void erase(iterator pos) { m_cont.erase(pos); }
84
85
86 template <typename Iterator>
87 void erase(Iterator first, Iterator last) { m_cont.erase(first, last); }
88
89 void clear() { m_cont.clear(); }
90
91 iterator find(const key_type& x) { return lower_bound(x); }
92 const_iterator find(const key_type& x) const { return lower_bound(x); }
93
94 size_type count(const key_type& x) const
95 {
96 std::pair<iterator, iterator> pit = equal_range(x);
97 return pit.second - pit.first;
98 }
99
100 iterator lower_bound(const key_type& x)
101 {
102 size_type len = size(), half;
103 iterator first = begin(), middle;
104 while(len > 0)
105 {
106 half = len >> 1;
107 middle = first + half;
108 if(cmp(kv(*middle), x))
109 {
110 first = middle;
111 ++first;
112 len = len - half - 1;
113 }
114 else
115 len = half;
116 }
117 return first;
118 }
119
120
121 const_iterator lower_bound(const key_type& x) const
122 {
123 size_type len = size(), half;
124 const_iterator first = begin(), middle;
125 while(len > 0)
126 {
127 half = len >> 1;
128 middle = first + half;
129 if(cmp(kv(*middle), x))
130 {
131 first = middle;
132 ++first;
133 len = len - half - 1;
134 }
135 else
136 len = half;
137 }
138 return first;
139 }
140
141 iterator upper_bound(const key_type& x)
142 {
143 size_type len = size(), half;
144 iterator first = begin(), middle;
145 while(len > 0)
146 {
147 half = len >> 1;
148 middle = first + half;
149 if(cmp(x, kv(*middle)))
150 len = half;
151 else
152 {
153 first = middle;
154 ++first;
155 len = len - half - 1;
156 }
157 }
158 return first;
159 }
160
161 const_iterator upper_bound(const key_type& x) const
162 {
163 size_type len = size(), half;
164 const_iterator first = begin(), middle;
165 while(len > 0)
166 {
167 half = len >> 1;
168 middle = first + half;
169 if(cmp(x, kv(*middle)))
170 len = half;
171 else
172 {
173 first = middle;
174 ++first;
175 len = len - half - 1;
176 }
177 }
178 return first;
179 }
180
181 std::pair<iterator, iterator> equal_range(const key_type& x)
182 {
183 return make_pair(lower_bound(x), upper_bound(x));
184 }
185
186 std::pair<const_iterator, const_iterator> equal_range(const key_type& x) const
187 {
188 return make_pair(lower_bound(x), upper_bound(x));
189 }
190
191 protected:
192 // insert x into sorted_vector, if there is already has one, return
193 // a pointer pointed to it and state false, otherwise return newly
194 // inserted address and true state
195 std::pair<iterator, bool> insert_unique(const_reference x)
196 {
197 iterator it = upper_bound(kv(x));
198 if(it == begin() || cmp(kv(*(it - 1)), kv(x)))
199 return std::pair<iterator, bool>(m_cont.insert(it, x), true);
200 else
201 return std::pair<iterator, bool>(it, false);
202 }
203
204 iterator insert_unique(iterator pos, const_reference x)
205 {
206 return insert_unique(x).first;
207 }
208
209 template <class InputIterator>
210 void insert_unique(InputIterator first, InputIterator last)
211 {
212 for(; first != last; ++first)
213 insert_unique(*first);
214 }
215
216 iterator insert_equal(const_reference x)
217 {
218 iterator it = upper_bound(kv(x));
219 return m_cont.insert(it, x);
220 }
221
222 iterator insert_equal(iterator pos, const_reference x)
223 {
224 return insert_equal(x);
225 }
226
227 // insert a sequence in the range [first, last) into sorted_vector
228 // if insert a small sequence into sorted vector, just append them
229 // and sorted the whole once
230 template <class InputIterator>
231 void insert_equal(InputIterator first, InputIterator last)
232 {
233 if(std::distance(first, last) < 16)
234 {
235 for(; first != last; ++first)
236 insert_equal(*first);
237 }
238 else
239 {
240 m_cont.insert(end(), first, last);
241 std::stable_sort(begin(), end(), key_cmp());
242 }
243 }
244
245 struct key_cmp // strange syntax required by some compilers
246 {
247 bool operator()(const_reference x, const_reference y) const
248 {
249 return key_compare()(key_of_value()(x), key_of_value()(y));
250 }
251 };
252
253 private:
254 key_of_value kv;
255 key_compare cmp;
256 std::vector<value_type> m_cont;
257 };
258
259 } // namespace Sample
260
261 #endif
仅仅以map和multimap为例,为了节约篇幅,set和multiset的实现忽略掉。map.hpp文件代码如下。
1 #ifndef MAP_MULTIMAP_HPP_
2 #define MAP_MULTIMAP_HPP_
3
4 #include "sorted_vector.hpp"
5 #include <functional>
6
7 namespace Sample // 为了避免重名,用namespace封起来
8 {
9
10 template <typename Pair>
11 struct select1st
12 {
13 const typename Pair::first_type& operator()(const Pair& x) const
14 {
15 return x.first;
16 }
17 };
18
19 template <typename Key, typename Value, typename Compare = std::less<Key> >
20 class map : public sorted_vector<Key, std::pair<Key, Value>, select1st<std::pair<Key, Value> >, Compare> // 用继承不用包含可以节约很多打字
21 {
22 public:
23 typedef Key key_type;
24 typedef Value value_type;
25 typedef Compare key_compare;
26 typedef std::pair<key_type, value_type> map_pair;
27 typedef sorted_vector<Key, map_pair, select1st<map_pair>, key_compare> Rep_type;
28 typedef typename Rep_type::iterator iterator;
29 typedef typename Rep_type::const_iterator const_iterator;
30 typedef typename Rep_type::reference reference;
31 typedef typename Rep_type::const_reference const_reference;
32 typedef typename Rep_type::size_type size_type;
33 typedef typename Rep_type::reverse_iterator reverse_iterator;
34 typedef typename Rep_type::const_reverse_iterator const_reverse_iterator;
35 public:
36 map() : Rep_type() {}
37 explicit map(size_type n) : Rep_type(n) {}
38 map(const map& s) : Rep_type(s) {}
39
40 map(iterator first, iterator last) : Rep_type()
41 {
42 insert_unique(first, last);
43 }
44
45 map(iterator first, iterator last, const key_compare& kc) : Rep_type(kc)
46 {
47 insert_unique(first, last);
48 }
49
50 map(const key_compare& kc) : Rep_type(kc) {}
51
52 std::pair<iterator, bool> insert(const_reference x)
53 {
54 return insert_unique(x);
55 }
56
57 iterator insert(iterator pos, const_reference x)
58 {
59 return insert(x).first;
60 }
61
62 template <class InputIterator>
63 void insert(InputIterator first, InputIterator last)
64 {
65 insert_unique(first, last);
66 }
67 };
68
69
70 template <typename Key, typename Value, typename Compare = std::less<Key> >
71 class multimap : public sorted_vector<Key, std::pair<Key, Value>, select1st<std::pair<Key, Value> >, Compare>
72 {
73 public:
74 typedef Key key_type;
75 typedef Value value_type;
76 typedef Compare key_compare;
77 typedef std::pair<key_type, value_type> map_pair;
78 typedef sorted_vector<key_type, map_pair, select1st<map_pair>, key_compare> Rep_type;
79 typedef typename Rep_type::iterator iterator;
80 typedef typename Rep_type::const_iterator const_iterator;
81 typedef typename Rep_type::reference reference;
82 typedef typename Rep_type::const_reference const_reference;
83 typedef typename Rep_type::size_type size_type;
84 typedef typename Rep_type::reverse_iterator reverse_iterator;
85 typedef typename Rep_type::const_reverse_iterator const_reverse_iterator;
86 public:
87 multimap() : Rep_type() {}
88 explicit multimap(size_type n) : Rep_type(n) {}
89 multimap(const multimap& s) : Rep_type(s) {}
90
91 multimap(iterator first, iterator last) : Rep_type()
92 { insert_equal(first, last); }
93
94 multimap(iterator first, iterator last, const key_compare& kc)
95 : Rep_type(kc) { insert_equal(first, last); }
96
97 multimap(const key_compare& kc) : Rep_type(kc) {}
98
99 iterator insert(const_reference x) { return insert_equal(x); }
100
101 iterator insert(iterator pos, const_reference x)
102 {
103 return insert(x).first;
104 }
105
106 template <class InputIterator>
107 void insert(InputIterator first, InputIterator last)
108 {
109 insert_equal(first, last);
110 }
111 };
112
113 } // namespace Sample
114
115 #endif
小测一下:
1 #include <iostream>
2 #include <cstdlib>
3 #include "map.hpp"
4 #include <vector>
5 #include <ctime>
6
7 int main()
8 {
9 typedef unsigned long UInt32;
10 const UInt32 SZ = 10000, LOOPS = 10;
11
12 std::vector<std::pair<UInt32, UInt32> > v;
13 v.reserve(SZ);
14 std::srand(std::time(0));
15 for(UInt32 i = 0; i < SZ; ++i)
16 v.push_back(std::make_pair(std::rand() * RAND_MAX + std::rand(), std::rand() * RAND_MAX + std::rand()));
17
18 double ie = 0, fe = 0, ee = 0;
19 std::cout << "testing performance\n";
20 for(UInt32 k = 0; k < LOOPS; ++k)
21 {
22 Sample::map<UInt32, UInt32> sv;
23 std::clock_t t = std::clock();
24 for(UInt32 i = 0; i < SZ; ++i)
25 sv.insert(v[i]);
26 ie += std::clock() - t;
27
28 // for(Sample::map<UInt32, UInt32>::const_iterator it = sv.begin(); it != sv.end(); ++it)
29 // std::cout << "[" << (*it).first << ", " << it->second << "]\n";
30
31 t = std::clock();
32 for(UInt32 i = 0; i < SZ; ++i)
33 sv.find(v[i].first);
34 fe += std::clock() - t;
35
36 t = std::clock();
37 for(UInt32 i = 0; i < SZ; ++i)
38 sv.erase(v[i].first);
39 ee += std::clock() - t;
40 }
41
42 std::cout << SZ << " entries tested, result is as the following:\n";
43
44 std::cout << "insertion: " << ie/LOOPS << "ms, retrieval: "
45 << fe/LOOPS << "ms, erasure: " << ee/LOOPS << "ms\n";
46
47 std::cout << "showing correctness \n";
48
49 Sample::multimap<UInt32, UInt32> msv;
50 msv.insert(v.begin(), v.begin() + (10 < v.size()? 10 : v.size()));
51 for(UInt32 i = 0; i < msv.size(); ++i)
52 std::cout << "[" << msv[i].first << ", " << msv[i].second << "]\n";
53 std::system("PAUSE");
54 return 0;
55 }
就这样了,看上去是不是很简单啊?下面是map的测试结果,计时器不用clock()函数,因为它精度太低,而是使用了一种可以精确到微妙的计时器(在此不作介绍),Win32系统,CPU Intel Q9500。
编译器g++4.5.2 (-O3)
数据量 | 插入(ms) | 查找(ms) | 删除(ms) | 内存占用(kb) |
| std:: | | std:: | | std:: | | std:: |
1e1 | 0.001316 | 0.001449 | 0.000487 | 0.000529 | 0.000582 | 0.001390 | 4 | 4 |
1e2 | 0.009308 | 0.014452 | 0.004041 | 0.004051 | 0.007470 | 0.013608 | 4 | 4 |
1e3 | 0.285278 | 0.212497 | 0.061661 | 0.055454 | 0.250626 | 0.201471 | 8 | 32 |
1e4 | 25.59130 | 3.317090 | 0.851534 | 0.960882 | 25.8821 | 2.923110 | 128 | 316 |
1e5 | 2931.850 | 41.21830 | 11.9850 | 18.69320 | 3065.81 | 36.45560 | 1024 | 3128 |
1e6 | --- | 847.5230 | 305.791 | 631.4230 | --- | 924.3610 | 8192 | 31256 |
上表中的std::表示标准的map, ---表示需要的时间非常久。
编译器VC++8.0 (/Ox/Ob2/Ot)
数据量 | 插入(ms) | 查找(ms) | 删除(ms) | 内存占用(kb) |
| std:: | | std:: | | std:: | | std:: |
1e1 | 0.001211 | 0.001713 | 0.000545 | 0.000532 | 0.000646 | 0.001965 | 4 | 4 |
1e2 | 0.014541 | 0.014452 | 0.004593 | 0.004597 | 0.009673 | 0.021276 | 4 | 4 |
1e3 | 0.344706 | 0.213861 | 0.061201 | 0.061096 | 0.301976 | 0.290216 | 8 | 32 |
1e4 | 29.98820 | 3.046300 | 0.840954 | 1.055350 | 26.11650 | 3.929450 | 128 | 316 |
1e5 | 3140.03 | 40.79320 | 11.3143 | 18.00870 | 3060.880 | 51.18610 | 1024 | 3128 |
1e6 | --- | 865.2950 | 304.088 | 671.109 | --- | 1197.280 | 8192 | 31256 |
从测试结果比较一下便知,存储短类型元素,排序数组往往比平衡树更节约内存,如果并非频繁的插入和删除(可以批量进行),而仅仅是大量检索的话,设计关联容器时,用排序数组很可能优于用平衡树。