我们无法知道梯度公式是否正确,所以想了一种虽然慢一些但是更简单的梯度求解公式,这样可以用来求解正确的参数值04 梯度下降的调试 - 图1

    1. import numpy as np
    2. import matplotlib.pyplot as plt
    3. # 准备数据
    4. np.random.seed(666)
    5. X = np.random.random(size=(1000, 10))
    6. true_theta = np.arange(1, 12, dtype=float)
    7. X_b = np.hstack([np.ones((len(X), 1)), X])
    8. y = X_b.dot(true_theta) + np.random.normal(size=1000)
    9. # 损失函数
    10. def J(theta, X_b, y):
    11. try:
    12. return np.sum((y - X_b.dot(theta))**2) / len(X_b)
    13. except:
    14. return float('inf')
    15. # 梯度公式
    16. def dJ_math(theta, X_b, y):
    17. return X_b.T.dot(X_b.dot(theta) - y) * 2. / len(y)
    18. # 梯度调试公式
    19. def dJ_debug(theta, X_b, y, epsilon=0.01):
    20. res = np.empty(len(theta))
    21. for i in range(len(theta)):
    22. theta_1 = theta.copy()
    23. theta_1[i] += epsilon
    24. theta_2 = theta.copy()
    25. theta_2[i] -= epsilon
    26. res[i] = (J(theta_1, X_b, y) - J(theta_2, X_b, y)) / (2 * epsilon)
    27. return res
    28. # 梯度下降函数
    29. def gradient_descent(dJ, X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):
    30. theta = initial_theta
    31. cur_iter = 0
    32. while cur_iter < n_iters:
    33. gradient = dJ(theta, X_b, y)
    34. last_theta = theta
    35. theta = theta - eta * gradient
    36. if(abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
    37. break
    38. cur_iter += 1
    39. return theta
    40. # 参数初始化
    41. X_b = np.hstack([np.ones((len(X), 1)), X])
    42. initial_theta = np.zeros(X_b.shape[1])
    43. eta = 0.01

    image.png