拟合散点方程

多点拟合

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def leastsq(func, x0, args=(), Dfun=None, full_output=0,
col_deriv=0, ftol=1.49012e-8, xtol=1.49012e-8,
gtol=0.0, maxfev=0, epsfcn=None, factor=100, diag=None):
"""
Minimize the sum of squares of a set of equations.
::
x = arg min(sum(func(y)**2,axis=0))
y
Parameters
----------
func : callable
should take at least one (possibly length N vector) argument and
returns M floating point numbers. It must not return NaNs or
fitting might fail.
x0 : ndarray
The starting estimate for the minimization.
args : tuple, optional
Any extra arguments to func are placed in this tuple.
Dfun : callable, optional
A function or method to compute the Jacobian of func with derivatives
across the rows. If this is None, the Jacobian will be estimated.
full_output : bool, optional
non-zero to return all optional outputs.
col_deriv : bool, optional
non-zero to specify that the Jacobian function computes derivatives
down the columns (faster, because there is no transpose operation).
ftol : float, optional
Relative error desired in the sum of squares.
xtol : float, optional
Relative error desired in the approximate solution.
gtol : float, optional
Orthogonality desired between the function vector and the columns of
the Jacobian.
maxfev : int, optional
The maximum number of calls to the function. If `Dfun` is provided
then the default `maxfev` is 100*(N+1) where N is the number of elements
in x0, otherwise the default `maxfev` is 200*(N+1).
epsfcn : float, optional
A variable used in determining a suitable step length for the forward-
difference approximation of the Jacobian (for Dfun=None).
Normally the actual step length will be sqrt(epsfcn)*x
If epsfcn is less than the machine precision, it is assumed that the
relative errors are of the order of the machine precision.
factor : float, optional
A parameter determining the initial step bound
(``factor * || diag * x||``). Should be in interval ``(0.1, 100)``.
diag : sequence, optional
N positive entries that serve as a scale factors for the variables.
Returns
-------
x : ndarray
The solution (or the result of the last iteration for an unsuccessful
call).
cov_x : ndarray
Uses the fjac and ipvt optional outputs to construct an
estimate of the jacobian around the solution. None if a
singular matrix encountered (indicates very flat curvature in
some direction). This matrix must be multiplied by the
residual variance to get the covariance of the
parameter estimates -- see curve_fit.
infodict : dict
a dictionary of optional outputs with the key s:
``nfev``
The number of function calls
``fvec``
The function evaluated at the output
``fjac``
A permutation of the R matrix of a QR
factorization of the final approximate
Jacobian matrix, stored column wise.
Together with ipvt, the covariance of the
estimate can be approximated.
``ipvt``
An integer array of length N which defines
a permutation matrix, p, such that
fjac*p = q*r, where r is upper triangular
with diagonal elements of nonincreasing
magnitude. Column j of p is column ipvt(j)
of the identity matrix.
``qtf``
The vector (transpose(q) * fvec).
mesg : str
A string message giving information about the cause of failure.
ier : int
An integer flag. If it is equal to 1, 2, 3 or 4, the solution was
found. Otherwise, the solution was not found. In either case, the
optional output variable 'mesg' gives more information.
Notes
-----
"leastsq" is a wrapper around MINPACK's lmdif and lmder algorithms.
cov_x is a Jacobian approximation to the Hessian of the least squares
objective function.
This approximation assumes that the objective function is based on the
difference between some observed target data (ydata) and a (non-linear)
function of the parameters `f(xdata, params)` ::
func(params) = ydata - f(xdata, params)
so that the objective function is ::
min sum((ydata - f(xdata, params))**2, axis=0)
params
The solution, `x`, is always a 1D array, regardless of the shape of `x0`,
or whether `x0` is a scalar.
"""
x0 = asarray(x0).flatten()
n = len(x0)
if not isinstance(args, tuple):
args = (args,)
shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
m = shape[0]
if n > m:
raise TypeError('Improper input: N=%s must not exceed M=%s' % (n, m))
if epsfcn is None:
epsfcn = finfo(dtype).eps
if Dfun is None:
if maxfev == 0:
maxfev = 200*(n + 1)
with _MINPACK_LOCK:
retval = _minpack._lmdif(func, x0, args, full_output, ftol, xtol,
gtol, maxfev, epsfcn, factor, diag)
else:
if col_deriv:
_check_func('leastsq', 'Dfun', Dfun, x0, args, n, (n, m))
else:
_check_func('leastsq', 'Dfun', Dfun, x0, args, n, (m, n))
if maxfev == 0:
maxfev = 100 * (n + 1)
with _MINPACK_LOCK:
retval = _minpack._lmder(func, Dfun, x0, args, full_output,
col_deriv, ftol, xtol, gtol, maxfev,
factor, diag)

errors = {0: ["Improper input parameters.", TypeError],
1: ["Both actual and predicted relative reductions "
"in the sum of squares\n are at most %f" % ftol, None],
2: ["The relative error between two consecutive "
"iterates is at most %f" % xtol, None],
3: ["Both actual and predicted relative reductions in "
"the sum of squares\n are at most %f and the "
"relative error between two consecutive "
"iterates is at \n most %f" % (ftol, xtol), None],
4: ["The cosine of the angle between func(x) and any "
"column of the\n Jacobian is at most %f in "
"absolute value" % gtol, None],
5: ["Number of calls to function has reached "
"maxfev = %d." % maxfev, ValueError],
6: ["ftol=%f is too small, no further reduction "
"in the sum of squares\n is possible.""" % ftol,
ValueError],
7: ["xtol=%f is too small, no further improvement in "
"the approximate\n solution is possible." % xtol,
ValueError],
8: ["gtol=%f is too small, func(x) is orthogonal to the "
"columns of\n the Jacobian to machine "
"precision." % gtol, ValueError],
'unknown': ["Unknown error.", TypeError]}

info = retval[-1] # The FORTRAN return value

if info not in [1, 2, 3, 4] and not full_output:
if info in [5, 6, 7, 8]:
warnings.warn(errors[info][0], RuntimeWarning)
else:
try:
raise errors[info][1](errors[info][0])
except KeyError:
raise errors['unknown'][1](errors['unknown'][0])

mesg = errors[info][0]
if full_output:
cov_x = None
if info in [1, 2, 3, 4]:
from numpy.dual import inv
perm = take(eye(n), retval[1]['ipvt'] - 1, 0)
r = triu(transpose(retval[1]['fjac'])[:n, :])
R = dot(r, perm)
try:
cov_x = inv(dot(transpose(R), R))
except (LinAlgError, ValueError):
pass
return (retval[0], cov_x) + retval[1:-1] + (mesg, info)
else:
return (retval[0], info)
以上为大家常用python数据分析库拟合曲线方程的方法。但是这里可以看到计算得到的方程并不是最优解而是牛顿迭代不断逼近的解。(SO:调库要谨慎!)

直线拟合

  • 首先scipy.optimize.leastsq可以直接调用就行。然后我看网上很多雷同的博客用的这个库直接调用的,自己测试后对于此库个人保留怀疑态度。所以下面是自己写的一个对于直线方程的拟合函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 这里用的方程是y=k*x+b
# 输入的是列表list,分别是x和y的值
# 输出的是k和b的值
def least_square(x, y):
if len(x) != len(y):
return False
aver_x = sum(x) / len(x)
aver_y = sum(y) / len(y)
# 分母和分子初始化
denominator = 0
molecule = 0
for i in range(0, len(x)):
denominator += (x[i] - aver_x) * (y[i] - aver_y)
molecule += pow((x[i] - aver_x), 2)
k = denominator / molecule
b = aver_y - k * aver_x
return k, b
  • 推导过程

    • 直线方程为$y = kx + b$
    • 先求k为$k = \frac{\sum^n_{i=1}(x_i-\bar{x})(y_i-\bar{y})}{\sum^n_{i=1}(x_i-\bar{x})^2}$
    • 再求b为$b = \bar{y} - k\bar{x}$
  • 推导过程参考wiki

针对椭圆的最小二乘法拟合

  • 平面上任意位置的一个椭圆,其中心坐标为(x0,y0),半长轴a,半短轴b,长轴偏角为θ,方程为$x^2 + Axy + By^2 + Cx + Dy + E = 0$
  • 转换为标准椭圆方程$\frac{(x-x_0)^2}{a^2} + \frac{(y-y_0)^2}{b^2} = 1$时,参数计算为:
    • $x_0 = \frac{2BC-AD}{A^2-4B}$
    • $y_0 = \frac{2D-AD}{A^2-4B}$
    • $a = \sqrt{\frac{2(ACD-BC^2-D^2+4BE-A^2E)}{(A^2-4B)(B+\sqrt{A^2+(1-B)^2}+1)}}$
    • $b = \sqrt{\frac{2(ACD-BC^2-D^2+4BE-A^2E)}{(A^2-4B)(B-\sqrt{A^2+(1-B)^2}+1)}}$
    • $\theta = \arctan(\sqrt{\frac{a^2-b^2B}{a^2B-b^2}})$
  • 在原始测得的N(N≥5)组数据(xi,yi),(i=1,2,3,…,N)中,根据椭圆方程通式和最小二乘法原理,求目标函数$F(A, B, C, D, E) = \sum^N_{i=1}(x_i^2 + Ax_iy_i + By_i^2 + Cx_i + Dy_i + E)^2$的最小值来确定参数A、B、C、D和E的值
  • 令F(A,B,C,D,E)对各个参数的偏导数均为零,得到以下方程组:

$$
\begin{bmatrix}
\sum^N_{i=1}x_i^2y_i^2 & \sum^N_{i=1}x_i^2y_i^3 & \sum^N_{i=1}x_i^2y_i & \sum^N_{i=1}x_iy_i^2 & \sum^N_{i=1}x_iy_i \
\sum^N_{i=1}x_iy_i^3 & \sum^N_{i=1}y_i^4 & \sum^N_{i=1}x_iy_i^2 & \sum^N_{i=1}y_i^3 & \sum^N_{i=1}y_i^2 \
\sum^N_{i=1}x_i^2y_i & \sum^N_{i=1}x_iy_i^2 & \sum^N_{i=1}x_i^2 & \sum^N_{i=1}x_iy_i & \sum^N_{i=1}x_i \
\sum^N_{i=1}x_iy_i^2 & \sum^N_{i=1}y_i^2 & \sum^N_{i=1}x_iy_i & \sum^N_{i=1}y_i^2 & \sum^N_{i=1}y_i \
\sum^N_{i=1}x_iy_i & \sum^N_{i=1}y_i^2 & \sum^N_{i=1}x_i & \sum^N_{i=1}y_i & N \
\end{bmatrix}
\begin{bmatrix}
A \
B \
C \
D \
E \
\end{bmatrix} =
\begin{bmatrix}
\sum^N_{i=1}x_i^3y_i \
\sum^N_{i=1}x_i^2y_i^2 \
\sum^N_{i=1}x_i^3 \
\sum^N_{i=1}x_i^2y_i \
\sum^N_{i=1}x_i^2 \
\end{bmatrix}
$$

  • 求解此线性方程组可解出A、B、C、D和E,代入第二个方程即可解得拟合的椭圆的参数
  • 代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# -*- coding: utf-8 -*-


import numpy as np


def solve_tuoyuan(x,y):
# a*x**2 + b*x*y + c*y**2 + d*x + e*y + f
x0,y0 = x.mean(),y.mean()
D1=np.array([(x-x0)**2,(x-x0)*(y-y0),(y-y0)**2]).T
D2=np.array([x-x0,y-y0,np.ones(y.shape)]).T
S1=np.dot(D1.T,D1)
S2=np.dot(D1.T,D2)
S3=np.dot(D2.T,D2)
T=-1*np.dot(np.linalg.inv(S3),S2.T)
M=S1+np.dot(S2,T)
M=np.array([M[2]/2,-M[1],M[0]/2])
lam,eigen=np.linalg.eig(M)
cond=4*eigen[0]*eigen[2]-eigen[1]**2
A1=eigen[:,cond>0]
A=np.vstack([A1,np.dot(T,A1)]).flatten()
A3=A[3]-2*A[0]*x0-A[1]*y0
A4=A[4]-2*A[2]*y0-A[1]*x0
A5=A[5]+A[0]*x0**2+A[2]*y0**2+A[1]*x0*y0-A[3]*x0-A[4]*y0
A[3]=A3; A[4]=A4; A[5]=A5
return A

def normal_style(paras):
paras=paras/paras[5]
A,B,C,D,E=paras[:5]
# 椭圆中心
x0=(B*E-2*C*D)/(4*A*C-B**2)
y0=(B*D-2*A*E)/(4*A*C-B**2)
# 长短轴
a= 2*np.sqrt((2*A*(x0**2)+2*C*(y0**2)+2*B*x0*y0-2)/(A+C+np.sqrt(((A-C)**2+B**2))))
b= 2*np.sqrt((2*A*(x0**2)+2*C*(y0**2)+2*B*x0*y0-2)/(A+C-np.sqrt(((A-C)**2+B**2))))
# 长轴倾角
q=0.5 * np.arctan(B/(A-C))
# normal_style
return x0,y0,a,b,q

def tuoyuan(y,x,p):
# 用来计算
return p[0]*x**2+p[1]*x*y+p[2]*y**2+p[3]*x + p[4]*y + p[5]
Donate comment here