1 /*
2 特征向量相似度和距离的计算
3
4 相似度:
5 ·夹角余弦
6 ·相关系数
7 ·Dice
8 ·Jaccard
9
10 距离
11 ·明氏距离
12 ·欧氏距离
13 ·马氏距离
14 ·Jffreys & Matusita 距离
15 ·Mahalanobis 距离,未实现,协方差矩阵
16 ·Camberra 距离(Lance 距离,Williams 距离)
17 */
18
19 #include <iostream>
20 #include <vector>
21 #include <cassert>
22 #include <cmath>
23 using namespace std;
24
25 double dotProduct(const vector<double>& v1, const vector<double>& v2)
26 {
27 assert(v1.size() == v2.size());
28 double ret = 0.0;
29 for (vector<double>::size_type i = 0; i != v1.size(); ++i)
30 {
31 ret += v1[i] * v2[i];
32 }
33 return ret;
34 }
35
36 double module(const vector<double>& v)
37 {
38 double ret = 0.0;
39 for (vector<double>::size_type i = 0; i != v.size(); ++i)
40 {
41 ret += v[i] * v[i];
42 }
43 return sqrt(ret);
44 }
45
46 // 夹角余弦
47 double cosine(const vector<double>& v1, const vector<double>& v2)
48 {
49 assert(v1.size() == v2.size());
50 return dotProduct(v1, v2) / (module(v1) * module(v2));
51 }
52
53 double mean(const vector<double>& v)
54 {
55 assert(v.size() != 0);
56 double ret = 0.0;
57 for (vector<double>::size_type i = 0; i != v.size(); ++i)
58 {
59 ret += v[i];
60 }
61 return ret / v.size();
62 }
63
64 double cov(const vector<double>& v1, const vector<double>& v2)
65 {
66 assert(v1.size() == v2.size() && v1.size() > 1);
67 double ret = 0.0;
68 double v1a = mean(v1), v2a = mean(v2);
69
70 for (vector<double>::size_type i = 0; i != v1.size(); ++i)
71 {
72 ret += (v1[i] - v1a) * (v2[i] - v2a);
73 }
74
75 return ret / (v1.size() - 1);
76 }
77
78 // 相关系数
79 double coefficient(const vector<double>& v1, const vector<double>& v2)
80 {
81 assert(v1.size() == v2.size());
82 return cov(v1, v2) / sqrt(cov(v1, v1) * cov(v2, v2));
83 }
84
85 // Dice 系数
86 double dice(const vector<double>& v1, const vector<double>& v2)
87 {
88 assert(v1.size() == v2.size());
89 return 2.0 * dotProduct(v1, v2) / (dotProduct(v1, v1) + dotProduct(v2, v2));
90 }
91
92 // Jaccard 系数
93 double jaccard(const vector<double>& v1, const vector<double>& v2)
94 {
95 assert(v1.size() == v2.size());
96 return dotProduct(v1, v2) / (dotProduct(v1, v2) + dotProduct(v2, v2) - dotProduct(v1, v2));
97 }
98
99 // Minkowsky 距离
100 double minkowsky(const vector<double>& v1, const vector<double>& v2, double m)
101 {
102 assert(v1.size() == v2.size());
103 double ret = 0.0;
104 for (vector<double>::size_type i = 0; i != v1.size(); ++i)
105 {
106 ret += pow(abs(v1[i] - v2[i]), m);
107 }
108 return pow(ret, 1.0 / m);
109 }
110
111 // Euclidean 距离
112 double euclidean(const vector<double>& v1, const vector<double>& v2)
113 {
114 assert(v1.size() == v2.size());
115 return minkowsky(v1, v2, 2.0);
116 }
117
118 // Manhattan 距离
119 double manhattan(const vector<double>& v1, const vector<double>& v2)
120 {
121 assert(v1.size() == v2.size());
122 return minkowsky(v1, v2, 1.0);
123 }
124
125 // Jffreys & Matusita 距离
126 double jffreysMatusita(const vector<double>& v1, const vector<double>& v2)
127 {
128 assert(v1.size() == v2.size());
129 double ret = 0.0;
130 for (vector<double>::size_type i = 0; i != v1.size(); ++i)
131 {
132 ret += (sqrt(v1[i]) - sqrt(v2[i])) * (sqrt(v1[i]) - sqrt(v2[i]));
133 }
134 return sqrt(ret);
135 }
136
137 // Mahalanobis 距离
138 double mahalanobis(const vector<double>& v1, const vector<double>& v2)
139 {
140 assert(v1.size() == v2.size());
141 return 0.0;
142 }
143
144 // Camberra 距离(Lance 距离,Williams 距离)
145 double camberra(const vector<double>& v1, const vector<double>& v2)
146 {
147 assert(v1.size() == v2.size());
148 double ret = 0.0;
149 for (vector<double>::size_type i = 0; i != v1.size(); ++i)
150 {
151 ret += abs(v1[i] - v2[i]) / abs(v1[i] + v2[i]);
152 }
153 return ret;
154 }
155
156 int main()
157 {
158 double a[] = {1, 2, 3, 4, 5};
159 double b[] = {5, 4, 3, 2, 1};
160 vector<double> v1(a, a + sizeof (a) / sizeof (*a)), v2(b, b + sizeof (b) / sizeof (*b));
161
162 cout << cosine(v1, v2) << endl;
163 cout << coefficient(v1, v2) << endl;
164 cout << dice(v1, v2) << endl;
165 cout << jaccard(v1, v2) << endl;
166
167 cout << minkowsky(v1, v2, 5.0) << endl;
168 cout << euclidean(v1, v2) << endl;
169 cout << manhattan(v1, v2) << endl;
170 cout << jffreysMatusita(v1, v2) << endl;
171 cout << mahalanobis(v1, v2) << endl;
172 cout << camberra(v1, v2) << endl;
173
174 return 0;
175 }
posted on 2012-02-13 15:18
unixfy 阅读(9213)
评论(1) 编辑 收藏 引用