最小二乘法简介。
理论1
线性回归是很常见的一种回归,线性回归可以用来预测或者分类,主要解决线性问题。线性回归过程主要解决的就是如何通过样本来获取最佳的拟合线。最常用的方法便是最小二乘法,它是一种数学优化技术,它通过最小化误差的平方和寻找数据的最佳函数匹配。
简单推导:
代码1
import numpy as np
import matplotlib.pyplot as plt
def initData():
xi = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
yi = [10, 11.5, 12, 13, 14.5, 15.5, 16.8, 17.3, 18, 18.7]
return xi, yi
def solution(x, y):
n = len(x)
sumX, sumY, sumXY, sumXX = 0, 0, 0, 0
for i in range(0, n):
sumX += x[i]
sumY += y[i]
sumXX += x[i] * x[i]
sumXY += x[i] * y[i]
a = (n*sumXY - sumX*sumY) / (n*sumXX - sumX*sumX)
b = (sumXX*sumY - sumX*sumXY) / (n*sumXX-sumX*sumX)
return a, b
def draw(x, y, xi, yi):
plt.plot(x, y)
plt.scatter(xi, yi)
plt.show()
if __name__ == "__main__":
xi, yi = initData()
a, b = solution(xi, yi)
x = np.linspace(0, 10)
y = a * x + b
draw(x, y, xi, yi)
结果1
理论2
矩阵推导:
代码2
import numpy as np
import matplotlib.pyplot as plt
def initData():
xi = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
yi = [10, 11.5, 12, 13, 14.5, 15.5, 16.8, 17.3, 18, 18.7]
return xi, yi
def draw(x, y, a, b):
plt.plot(x, y, 'o', label='data', markersize=10)
plt.plot(x, a * x + b, 'r', label='line')
plt.show()
if __name__ == "__main__":
x, y = initData()
A = np.vstack([x, np.ones(len(x))]).T
a, b = np.linalg.lstsq(A, y)[0]
print("y = %10.5fx + %10.5f" % (a, b))
x = np.array(x)
y = np.array(y)
draw(x, y, a, b)