1.准备数据

  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. # 准备数据
  4. plot_x = np.linspace(-1., 6., 141)
  5. plot_x
  1. array([-1. , -0.95, -0.9 , -0.85, -0.8 , -0.75, -0.7 , -0.65, -0.6 ,
  2. -0.55, -0.5 , -0.45, -0.4 , -0.35, -0.3 , -0.25, -0.2 , -0.15,
  3. -0.1 , -0.05, 0. , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 ,
  4. 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 , 0.65, 0.7 , 0.75,
  5. 0.8 , 0.85, 0.9 , 0.95, 1. , 1.05, 1.1 , 1.15, 1.2 ,
  6. 1.25, 1.3 , 1.35, 1.4 , 1.45, 1.5 , 1.55, 1.6 , 1.65,
  7. 1.7 , 1.75, 1.8 , 1.85, 1.9 , 1.95, 2. , 2.05, 2.1 ,
  8. 2.15, 2.2 , 2.25, 2.3 , 2.35, 2.4 , 2.45, 2.5 , 2.55,
  9. 2.6 , 2.65, 2.7 , 2.75, 2.8 , 2.85, 2.9 , 2.95, 3. ,
  10. 3.05, 3.1 , 3.15, 3.2 , 3.25, 3.3 , 3.35, 3.4 , 3.45,
  11. 3.5 , 3.55, 3.6 , 3.65, 3.7 , 3.75, 3.8 , 3.85, 3.9 ,
  12. 3.95, 4. , 4.05, 4.1 , 4.15, 4.2 , 4.25, 4.3 , 4.35,
  13. 4.4 , 4.45, 4.5 , 4.55, 4.6 , 4.65, 4.7 , 4.75, 4.8 ,
  14. 4.85, 4.9 , 4.95, 5. , 5.05, 5.1 , 5.15, 5.2 , 5.25,
  15. 5.3 , 5.35, 5.4 , 5.45, 5.5 , 5.55, 5.6 , 5.65, 5.7 ,
  16. 5.75, 5.8 , 5.85, 5.9 , 5.95, 6. ])

2.可视化

  1. plot_y = (plot_x-2.5)**2 - 1.
  2. plt.plot(plot_x, plot_y)
  3. plt.show()

image.png

3.迭代过程

  1. epsilon = 1e-8
  2. eta = 0.1
  3. def J(theta):
  4. return (theta-2.5)**2 - 1.
  5. def dJ(theta):
  6. return 2*(theta-2.5)
  7. theta = 0.0
  8. while True:
  9. gradient = dJ(theta)
  10. last_theta = theta
  11. theta = theta - eta * gradient
  12. if(abs(J(theta) - J(last_theta)) < epsilon):
  13. break
  14. print(theta) # 2.499891109642585
  15. print(J(theta)) # -0.99999998814289
  1. theta = 0.0
  2. theta_history = [theta]
  3. while True:
  4. gradient = dJ(theta)
  5. last_theta = theta
  6. theta = theta - eta * gradient
  7. theta_history.append(theta)
  8. if(abs(J(theta) - J(last_theta)) < epsilon):
  9. break
  10. plt.plot(plot_x, J(plot_x))
  11. plt.plot(np.array(theta_history), J(np.array(theta_history)), color="r", marker='+')
  12. plt.show()

image.png

  1. len(theta_history) # 46

4.封装代码

  1. theta_history = []
  2. def gradient_descent(initial_theta, eta, epsilon=1e-8):
  3. theta = initial_theta
  4. theta_history.append(initial_theta)
  5. while True:
  6. gradient = dJ(theta)
  7. last_theta = theta
  8. theta = theta - eta * gradient
  9. theta_history.append(theta)
  10. if(abs(J(theta) - J(last_theta)) < epsilon):
  11. break
  12. def plot_theta_history():
  13. plt.plot(plot_x, J(plot_x))
  14. plt.plot(np.array(theta_history), J(np.array(theta_history)), color="r", marker='+')
  15. plt.show()
  1. eta = 0.01
  2. theta_history = []
  3. gradient_descent(0, eta)
  4. plot_theta_history()

image.png

  1. len(theta_history) # 424
  1. eta = 0.001
  2. theta_history = []
  3. gradient_descent(0, eta)
  4. plot_theta_history()

image.png

  1. len(theta_history) # 3682

5.步伐略大

  1. eta = 0.8
  2. theta_history = []
  3. gradient_descent(0, eta)
  4. plot_theta_history()

image.png

6.步伐太大

  1. eta = 1.1
  2. theta_history = []
  3. gradient_descent(0, eta) # OverflowError: (34, 'Result too large')
  1. def J(theta):
  2. try:
  3. return (theta-2.5)**2 - 1.
  4. except:
  5. return float('inf')
  6. def gradient_descent(initial_theta, eta, n_iters = 1e4, epsilon=1e-8):
  7. theta = initial_theta
  8. i_iter = 0
  9. theta_history.append(initial_theta)
  10. while i_iter < n_iters:
  11. gradient = dJ(theta)
  12. last_theta = theta
  13. theta = theta - eta * gradient
  14. theta_history.append(theta)
  15. if(abs(J(theta) - J(last_theta)) < epsilon):
  16. break
  17. i_iter += 1
  18. return
  1. eta = 1.1
  2. theta_history = []
  3. gradient_descent(0, eta)
  4. len(theta_history) # 10001
  1. eta = 1.1
  2. theta_history = []
  3. gradient_descent(0, eta, n_iters=10)
  4. plot_theta_history()

image.png