考虑用有M个未定参数a
j(j=1,...,M)的模型来拟合N个数据点(x
i, y
i),i= 1, 2, ..., N。
因变量与自变量的一个函数关系可以如下给出:
y(x) = y( x; a
1, a
2, ..., a
M)
如果所给的N个数据点(x
i, y
i)误差都是独立的,并且服从具有相同常数方差的正态分布,那么最小二乘方拟合就是拟合参数a
j的最大似然估计
X
2 = \sum_{i=1}^N [ y
i - y(x
i; a
1, a
2, ..., a
M )]
2如果这个模型是x的任意M个特定函数的线性组合,如
y(x) = a
0 + a
1 cos(k
1x+2) + a
2 sin(k
2x+3)
一般形式可以写为
y(x) = \sum_{k=1}^N a
k X
k(x)
那么它的优值函数为
X
2 = \sum_{i=1}^N [y
i - \sum_{k=1}^M a
k X
k(x
i)]
2将上式分别对M个参数a
k求偏导数,并令其等于0, 可以得到X
2的最小值。
于是得到M个方程
\sum_{i=1}^N [y
i-\sum_{j=1}^M a
j X
j(x
i)]X
k(x
i) = 0 k= 1, 2, ..., M
用克莱姆法则求解这个M元一次方程组,可以得到 a
1, a
2, ...,
a
M的解,即为所求的a
j的最大似然估计。
接下来估计数据与模型的契合度
由平移伽玛近似计算
ssq = gammq((N-2)/2, X
2/2)
以下为c++实现
regression.h
1 template<size_t SIZE, size_t N>
2 class Regression
3 {
4 public:
5 //ctor
6 Regression()
7 {
8 Y.resize ( SIZE );
9 X.resize ( SIZE * N );
10 Result.resize ( SIZE );
11 SSQ = 0.0L;
12 }
13
14 //dtor
15 ~Regression() {}
16 //get Y[SIZE]
17 void _y ( const std :: vector<long double>& y )
18 {
19 assert ( SIZE == y.size() );
20 Y = y;
21 }
22
23 //set vector x[][]
24 void _x ( const std :: vector<long double>& x )
25 {
26 assert ( SIZE * N == x.size() );
27 X = x;
28 }
29
30 //set vector x[][] cloumn index
31 void _x ( const std :: vector<long double>& x, const unsigned int& index )
32 {
33 assert ( index < N );
34 assert ( SIZE == x.size() );
35
36 for ( unsigned int i = 0; i < SIZE; ++i )
37 {
38 X[ i * N + index ] = x[i];
39 }
40 }
41
42 //execute least square fit
43 void fit()
44 {
45 //the model is
46 // alfa[N][N] * result[N] = beta[N]
47 //
48 long double alfa[N][N];
49 long double beta[N];
50
51 for ( unsigned int k = 0; k < N; ++k )
52 {
53 for ( unsigned int j = 0; j < N; ++j )
54 {
55 long double tmp = 0.0L;
56 for ( unsigned int i = 0; i < SIZE; ++i )
57 {
58 tmp += X[i*N+k] * X[i*N+j];
59 }//end of i loop
60 alfa[k][j] = tmp;
61 }//end of j loop
62
63 long double tmp = 0.0L;
64 for ( unsigned int i = 0; i < SIZE; ++i )
65 {
66 tmp += Y[i] * X[i*N+k];
67 }//end of i loop
68 beta[k] = tmp;
69 }//end of k loop
70
71 std::vector<long double> a;
72 a.clear();
73 for ( unsigned int i = 0; i < N; ++i )
74 for ( unsigned int j = 0; j < N; ++j )
75 {
76 a.push_back ( alfa[i][j] );
77 }
78
79 std::vector<long double> b;
80 b.clear();
81 for ( unsigned int i = 0; i < N; ++i )
82 b.push_back ( beta[i] );
83
84 //calculate Result[N]
85 lq<long double> ( a, b, N, Result );
86
87 //calculate chi-square
88 long double chi = 0.0L;
89 for ( unsigned int i = 0; i < SIZE; ++i )
90 {
91 long double tmp = 0.0L;
92 for ( unsigned int j = 0; j < N; ++j )
93 {
94 tmp += Result[j] * X[i*j+j];
95 }
96 tmp -= Y[i];
97 chi += tmp * tmp;
98 }
99
100 SSQ = gammq ( static_cast<long double> ( SIZE - 2 ) / 2.0L, chi / 2.0L );
101
102
103 }//end of k loop
104
105 std :: vector< long double> result() const
106 {
107 return Result;
108 }
109
110 long double ssq() const
111 {
112 return SSQ;
113 }
114
115 private:
116 std :: vector<long double> Y; // -- Y[SIZE]
117 std :: vector<long double> X; // -- X[N][SIZE]
118 std :: vector<long double> Result; // -- Result[N]
119 long double SSQ; //fitness of the model
120 };
121
122
其中SIZE即为所要拟合的a
j的数量M,而N为原始数据点数(x
i, y
i)的数量N。
给出gamma函数计算代码
#include <cmath>
#include <cstdlib>
static long double
gammln ( const long double &xx )
{
long double x,
tmp,
ser;
static long double cof[6] = { 76.18009173, -86.50532033, 24.01409822,
-1.231739516, 0.120858003e-2, -0.536382e-5
};
int j;
x = xx - 1.0;
tmp = x + 5.5;
tmp -= ( x + 0.5 ) * log ( tmp );
ser = 1.0;
for ( j = 0; j <= 5; j++ )
{
x += 1.0;
ser += cof[j] / x;
}
return -tmp + log ( 2.50662827465 * ser );
}
static void
gcf ( long double *gammcf,
const long double &a, const long double &x, long double *gln )
{
const unsigned int ITMAX = 100;
const long double EPS = 3.0e-7;
unsigned int n = 0;
long double gold = 0.0,
g,
fac = 1.0,
b1 = 1.0;
long double b0 = 0.0,
anf,
ana,
an,
a1,
a0 = 1.0;
*gln = gammln ( a );
a1 = x;
for ( n = 1; n <= ITMAX; n++ )
{
an = ( float ) n;
ana = an - a;
a0 = ( a1 + a0 * ana ) * fac;
b0 = ( b1 + b0 * ana ) * fac;
anf = an * fac;
a1 = x * a0 + anf * a1;
b1 = x * b0 + anf * b1;
if ( a1 )
{
fac = 1.0 / a1;
g = b1 * fac;
if ( fabs ( ( g - gold ) / g ) < EPS )
{
*gammcf = exp ( -x + a * log ( x ) - ( *gln ) ) * g;
return;
}
gold = g;
}
}
if ( "a too large, ITMAX too small in routine GCF" )
exit ( EXIT_FAILURE );
}
static void
gser ( long double *gamser,
const long double &a, const long double &x, long double *gln )
{
const unsigned int ITMAX = 100;
const long double EPS = 3.0e-7;
unsigned int n = 0;
long double sum,
del,
ap;
*gln = gammln ( a );
if ( x <= 0.0 )
{
if ( x < 0.0 )
if ( "x less than 0 in routine GSER" )
exit ( EXIT_FAILURE );
*gamser = 0.0;
return;
}
else
{
ap = a;
del = sum = 1.0 / a;
for ( n = 1; n <= ITMAX; n++ )
{
ap += 1.0;
del *= x / ap;
sum += del;
if ( fabs ( del ) < fabs ( sum ) * EPS )
{
*gamser = sum * exp ( -x + a * log ( x ) - ( *gln ) );
return;
}
}
if ( "a too large, ITMAX too small in routine GSER" )
exit ( EXIT_FAILURE );
return;
}
}
long double
gammq ( const long double &a, const long double &x )
{
long double gamser,
gammcf,
gln;
if ( x < 0.0 || a <= 0.0 )
if ( "Invalid arguments in routine gammq." )
exit ( EXIT_FAILURE );
if ( x < ( a + 1.0 ) )
{
gser ( &gamser, a, x, &gln );
return 1.0 - gamser;
}
else
{
gcf ( &gammcf, a, x, &gln );
return gammcf;
}
}
利用克莱姆法则求解线性方程组
lq.h
1 #include "det.h"
2
3 #include <vector>
4 #include <cassert>
5 #include <cstdlib>
6
7 //
8 //for solution of the linear equation
9 // A X = B
10 //
11 template<class T>
12 void lq ( const std :: vector<T>& a, //store A[size][size]
13 const std :: vector<T>& b, //store B[size]
14 const unsigned int& order, //store size
15 std :: vector<T>& result //store X[size]
16 )
17 {
18 unsigned int Order = order;
19 //if order is set to default, calculate
20 if ( 0 == Order )
21 {
22 Order = b.size();
23 }
24
25 assert ( Order * Order == a.size() );
26 assert ( Order == b.size() );
27
28 result.clear();
29
30 const T _D = det ( a, Order ); //store D
31
32 if ( 0 == _D )
33 {
34 if ( "Failed to solve the linear equation" )
35 exit ( EXIT_FAILURE );
36 }
37
38 for ( unsigned int i = 0; i < Order; ++i )
39 {
40 std :: vector<T> A = a;
41 for ( unsigned int j = 0; j < Order; ++j )
42 {
43 A[i+j*Order] = b[j];
44 }
45 const T D = det ( A, Order ); //store D[i]
46
47 result.push_back ( D / _D ); //get X[i]
48 }
49 }
50
在lq.h中有提到一个det函数,这个是用来计算矩阵的行列式值的,因为克莱姆法则就是几个行列式的值之间除来除去罢了,代码如下
det.h
1 //return the determinant of a square matrix arr whose size is order by order
2 template< class T >
3 T det ( const std :: vector<T>& arr, const unsigned int& order = 0 )
4 {
5 unsigned int Order = order;
6
7 //if order is set to default, calculate it
8 if ( 0 == Order )
9 {
10 Order = sqrt ( arr.size() );
11 }
12
13 assert ( Order * Order == arr.size() );
14
15
16 if ( 1 == Order )
17 return ( arr[0] ) ;
18
19 T sum = 0;
20 std ::vector<T> arr2 ;
21 int sign = 1;
22
23 for ( unsigned int i = 0 ; i < Order ; ++i, sign *= -1 )
24 {
25 /* copy n-1 by n-1 array into another array */
26 const unsigned int newsize = ( Order - 1 ) * ( Order - 1 ) ;
27
28 arr2.resize ( newsize );
29 for ( unsigned int j = 1 ; j < Order ; ++j )
30 {
31 for ( unsigned int k = 0, count = 0 ; k < Order ; ++k )
32 {
33 if ( k == i )
34 continue ;//jump out of k loop
35
36 const unsigned int pos = j * Order + k ;
37 const unsigned int newpos = ( j - 1 ) *
38 ( Order - 1 ) + count ;
39 arr2[newpos] = arr[pos] ;
40 count++ ;
41 }//end of k loop
42 }//end of j loop
43
44 /* find determinant value of n-1 by n-1 array and add it to sum */
45 sum += arr[i] * sign * det ( arr2, Order - 1 ) ;
46 }
47 return sum;
48 }
49
下边给出一个示例
要拟合的参数方程为
a
1 x
1 + a
2 x
2 + a
3 x
3 + a
4 x
4 + a
5 x
5 = y
给出5组数据进行拟合,分别是(1 0 0 0 0 1)(0 1 0 0 0 2)(0 0 1 0 0 3)(0 0 0 1 0 4)(0 0 0 0 1 5)
1 #include "regression.h"
2 #include <iostream>
3 #include <vector>
4
5 using namespace std;
6
7 int main()
8 {
9 const unsigned int N = 5;
10
11 Regression<N, N> r;
12 vector<long double> y;
13 vector<long double> x;
14 vector<long double> result;
15
16
17 for ( unsigned int i = 0; i < N; ++i )
18 y.push_back( 1.0L + i );
19 r._y( y );
20
21 for ( unsigned int i = 0; i < N; ++i )
22 {
23 x.clear();
24 for ( unsigned int j = 0; j < N; ++j )
25 {
26 if ( i == j )
27 x.push_back( 1.0L );
28 else
29 x.push_back( 0.0L );
30 }
31 r._x( x, i );
32 }
33
34 r.fit();
35
36 result = r.result();
37
38 for ( unsigned int i = 0; i < N; ++i )
39 cout << result[i] << endl;
40
41
42 return 0;
43 }
posted on 2008-04-15 22:38
Wang Feng 阅读(4892)
评论(2) 编辑 收藏 引用 所属分类:
Numerical C++