1、公式法

通过公式法可以直接求出线性回归的最优解

import numpy as np
import matplotlib.pyplot as plt
X = 2 * np.random.rand(100, 1)  # 100*1矩阵
y = 12 + 5 * X + np.random.randn(100, 1)
plt.plot(X, y, 'b.')
plt.xlabel("X_1")
plt.ylabel("y")
plt.axis([0, 2, 10, 20])
plt.show()
number_data = len(X)
X_b = np.c_[np.ones((100, 1)), X]
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
print(theta_best)

2、梯度下降法

2.1批量梯度下降法(Batch Gradient Descent)

批量梯度下降法是最原始的形式，它是指在每一次迭代时使用所有样本来进行梯度的更新。

import numpy as np
import matplotlib.pyplot as plt
X = 2 * np.random.rand(100, 1)  # 100*1矩阵
y = 12 + 5 * X + np.random.randn(100, 1)
plt.plot(X, y, 'b.')
plt.xlabel("X_1")
plt.ylabel("y")
plt.axis([0, 2, 10, 20])
plt.show()
eta = 0.01
n_iterations = 100
number_data = len(X)
X_b = np.c_[np.ones((number_data, 1)), X]
# 随机设定模型参数
theta = np.random.randn(2, 1)
cost_history = []
for iteration in range(n_iterations):
    # 计算损失
    cost = 1 / (2 * number_data) * (y - X_b.dot(theta)).T.dot(y - X_b.dot(theta))[0][0]
    cost_history.append(cost)
    # 计算梯度
    gradient = 2 / number_data * X_b.T.dot(X_b.dot(theta) - y)
    # 更新模型参数
    theta = theta - eta*gradient
plt.plot(range(len(cost_history)), cost_history)
plt.show()

2.2随机梯度下降法(Stochastic Gradient Descent，SGD)

import numpy as np
import matplotlib.pyplot as plt
X = 2 * np.random.rand(100, 1)  # 100*1矩阵
y = 12 + 5 * X + np.random.randn(100, 1)
plt.plot(X, y, 'b.')
plt.xlabel("X_1")
plt.ylabel("y")
plt.axis([0, 2, 10, 20])
plt.show()
number_data = len(X)
X_b = np.c_[np.ones((number_data, 1)), X]
theta = np.random.randn(2, 1)
def learning_schedule(t):
    """
        返回当前的学习率,学习率先大后小
    :param t:
    :return:
    """
    t0 = 5
    t1 = 50
    return t0 / (t1 + t)
n_epochs = 50  # 学习迭代的次数
m = len(X_b)  # 样本的数量
theta_path_sgd = []  # 保存theta更新路径
cost_path_sgd = []  # 保存损失值的更新路径
for epoch in range(n_epochs):
    for i in range(m):
        random_index = np.random.randint(m)
        xi = X_b[random_index:random_index + 1]
        yi = y[random_index:random_index + 1]
        gradient = 2 * xi.T.dot(xi.dot(theta) - yi)
        eta = learning_schedule(n_epochs * m + i)
        # 保存theta更新
        theta = theta - eta * gradient
        theta_path_sgd.append(theta)
        # 保存损失值更新
        cost = (y - X_b.dot(theta)).T.dot((y - X_b.dot(theta)))[0][0]
        cost_path_sgd.append(cost)
plt.plot(range(len(cost_path_sgd)), cost_path_sgd)
plt.show()

2.3 小批量梯度下降(Mini-Batch Gradient Descent, MBGD)

import numpy as np
import matplotlib.pyplot as plt
X = 2 * np.random.rand(100, 1)  # 100*1矩阵
y = 12 + 5 * X + np.random.randn(100, 1)
plt.plot(X, y, 'b.')
plt.xlabel("X_1")
plt.ylabel("y")
plt.axis([0, 2, 10, 20])
plt.show()
# eta = 0.01
# n_iterations = 100
number_data = len(X)
X_b = np.c_[np.ones((number_data, 1)), X]
def learning_schedule(t):
    """
        返回当前的学习率,学习率先大后小
    :param t:
    :return:
    """
    t0 = 5
    t1 = 50
    return t0 / (t1 + t)
n_epochs = 50  # 学习迭代的次数
minibatch = 16
theta = np.random.randn(2, 1)  # 随机设定模型参数
t = 0
m = len(X_b)
cost_path_mgd = []
theta_path_mgd = []
for epoch in range(n_epochs):
    shuffled_indices = np.random.permutation(m)
    X_b_shuffled = X_b[shuffled_indices]
    y_shuffled = y[shuffled_indices]
    for i in range(0, m, minibatch):
        t += 1
        xi = X_b_shuffled[i:minibatch + i]
        yi = y_shuffled[i:minibatch + i]
        gradients = 2 / minibatch * xi.T.dot(xi.dot(theta) - yi)
        eta = learning_schedule(t)
        theta = theta - eta * gradients
        theta_path_mgd.append(theta)
        cost = (y - X_b.dot(theta)).T.dot((y - X_b.dot(theta)))[0][0]
        cost_path_mgd.append(cost)
plt.plot(range(len(cost_path_mgd)), cost_path_mgd)
plt.show()

机器学习

梯度下降法-线性回归

1、公式法

2、梯度下降法

2.1批量梯度下降法(Batch Gradient Descent)

2.2随机梯度下降法(Stochastic Gradient Descent，SGD)

2.3 小批量梯度下降(Mini-Batch Gradient Descent, MBGD)