1、公式法
通过公式法可以直接求出线性回归的最优解
import numpy as npimport matplotlib.pyplot as pltX = 2 * np.random.rand(100, 1) # 100*1矩阵y = 12 + 5 * X + np.random.randn(100, 1)plt.plot(X, y, 'b.')plt.xlabel("X_1")plt.ylabel("y")plt.axis([0, 2, 10, 20])plt.show()number_data = len(X)X_b = np.c_[np.ones((100, 1)), X]theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)print(theta_best)
2、梯度下降法
2.1批量梯度下降法(Batch Gradient Descent)
批量梯度下降法是最原始的形式,它是指在每一次迭代时使用所有样本来进行梯度的更新。
import numpy as npimport matplotlib.pyplot as pltX = 2 * np.random.rand(100, 1) # 100*1矩阵y = 12 + 5 * X + np.random.randn(100, 1)plt.plot(X, y, 'b.')plt.xlabel("X_1")plt.ylabel("y")plt.axis([0, 2, 10, 20])plt.show()eta = 0.01n_iterations = 100number_data = len(X)X_b = np.c_[np.ones((number_data, 1)), X]# 随机设定模型参数theta = np.random.randn(2, 1)cost_history = []for iteration in range(n_iterations):# 计算损失cost = 1 / (2 * number_data) * (y - X_b.dot(theta)).T.dot(y - X_b.dot(theta))[0][0]cost_history.append(cost)# 计算梯度gradient = 2 / number_data * X_b.T.dot(X_b.dot(theta) - y)# 更新模型参数theta = theta - eta*gradientplt.plot(range(len(cost_history)), cost_history)plt.show()
2.2随机梯度下降法(Stochastic Gradient Descent,SGD)
import numpy as npimport matplotlib.pyplot as pltX = 2 * np.random.rand(100, 1) # 100*1矩阵y = 12 + 5 * X + np.random.randn(100, 1)plt.plot(X, y, 'b.')plt.xlabel("X_1")plt.ylabel("y")plt.axis([0, 2, 10, 20])plt.show()number_data = len(X)X_b = np.c_[np.ones((number_data, 1)), X]theta = np.random.randn(2, 1)def learning_schedule(t):"""返回当前的学习率,学习率先大后小:param t::return:"""t0 = 5t1 = 50return t0 / (t1 + t)n_epochs = 50 # 学习迭代的次数m = len(X_b) # 样本的数量theta_path_sgd = [] # 保存theta更新路径cost_path_sgd = [] # 保存损失值的更新路径for epoch in range(n_epochs):for i in range(m):random_index = np.random.randint(m)xi = X_b[random_index:random_index + 1]yi = y[random_index:random_index + 1]gradient = 2 * xi.T.dot(xi.dot(theta) - yi)eta = learning_schedule(n_epochs * m + i)# 保存theta更新theta = theta - eta * gradienttheta_path_sgd.append(theta)# 保存损失值更新cost = (y - X_b.dot(theta)).T.dot((y - X_b.dot(theta)))[0][0]cost_path_sgd.append(cost)plt.plot(range(len(cost_path_sgd)), cost_path_sgd)plt.show()
2.3 小批量梯度下降(Mini-Batch Gradient Descent, MBGD)
import numpy as npimport matplotlib.pyplot as pltX = 2 * np.random.rand(100, 1) # 100*1矩阵y = 12 + 5 * X + np.random.randn(100, 1)plt.plot(X, y, 'b.')plt.xlabel("X_1")plt.ylabel("y")plt.axis([0, 2, 10, 20])plt.show()# eta = 0.01# n_iterations = 100number_data = len(X)X_b = np.c_[np.ones((number_data, 1)), X]def learning_schedule(t):"""返回当前的学习率,学习率先大后小:param t::return:"""t0 = 5t1 = 50return t0 / (t1 + t)n_epochs = 50 # 学习迭代的次数minibatch = 16theta = np.random.randn(2, 1) # 随机设定模型参数t = 0m = len(X_b)cost_path_mgd = []theta_path_mgd = []for epoch in range(n_epochs):shuffled_indices = np.random.permutation(m)X_b_shuffled = X_b[shuffled_indices]y_shuffled = y[shuffled_indices]for i in range(0, m, minibatch):t += 1xi = X_b_shuffled[i:minibatch + i]yi = y_shuffled[i:minibatch + i]gradients = 2 / minibatch * xi.T.dot(xi.dot(theta) - yi)eta = learning_schedule(t)theta = theta - eta * gradientstheta_path_mgd.append(theta)cost = (y - X_b.dot(theta)).T.dot((y - X_b.dot(theta)))[0][0]cost_path_mgd.append(cost)plt.plot(range(len(cost_path_mgd)), cost_path_mgd)plt.show()
