1. # Python ≥3.5 is required
  2. import sys
  3. assert sys.version_info >= (3, 5)
  4. # Scikit-Learn ≥0.20 is required
  5. import sklearn
  6. assert sklearn.__version__ >= "0.20"
  7. # Common imports
  8. import numpy as np
  9. import os
  10. # to make this notebook's output stable across runs
  11. np.random.seed(42)
  12. # To plot pretty figures
  13. %matplotlib inline
  14. import matplotlib as mpl
  15. import matplotlib.pyplot as plt
  16. mpl.rc('axes', labelsize=14)
  17. mpl.rc('xtick', labelsize=12)
  18. mpl.rc('ytick', labelsize=12)
  19. # Where to save the figures
  20. PROJECT_ROOT_DIR = "."
  21. CHAPTER_ID = "training_linear_models"
  22. IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
  23. # os.makedirs(IMAGES_PATH, exist_ok=True)
  24. def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
  25. path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
  26. print("Saving figure", fig_id)
  27. if tight_layout:
  28. plt.tight_layout()
  29. plt.savefig(path, format=fig_extension, dpi=resolution)
  30. # Ignore useless warnings (see SciPy issue #5998)
  31. import warnings
  32. warnings.filterwarnings(action="ignore", message="^internal gelsd")
  1. import numpy as np
  2. X = 2 * np.random.rand(100, 1)
  3. y = 4 + 3 * X + np.random.randn(100, 1)
  1. plt.plot(X, y, "b.")
  2. plt.xlabel("$x_1$", fontsize=18)
  3. plt.ylabel("$y$", rotation=0, fontsize=18)
  4. plt.axis([0, 2, 0, 16])
  5. # save_fig("generated_data_plot")
  6. plt.show()

下载.png

1.使用标准方程计算出theta

  1. X_b = np.c_[np.ones((100,1)),X]
  2. X_b
  3. theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
  4. theta_best
  1. X_new = np.array([[0],[2]])
  2. X_new_b = np.c_[np.ones((2,1)),X_new]
  3. y_predict = X_new_b.dot(theta_best)
  4. y_predict
  1. plt.plot(X_new,y_predict,'r-')
  2. plt.scatter(X,y,c='k',s=6)
  3. plt.axis([0,2,0,15])
  4. plt.show()

4.2.png

2.使用sklearn

  1. from sklearn.linear_model import LinearRegression
  2. lin_reg = LinearRegression()
  3. lin_reg.fit(X,y)
  4. lin_reg.intercept_ , lin_reg.coef_
  5. lin_reg.predict(X_new)

3.梯度下降法的快速实现

  1. eta = 0.1 # learning rate
  2. n_iterations = 1000
  3. m = 100
  4. theta = np.random.randn(2,1) # random initialization
  5. for iteration in range(n_iterations):
  6. gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)
  7. theta = theta - eta * gradients

4.随机梯度下降的快速实现

  1. theta_path_sgd = []
  2. m = len(X_b)
  3. np.random.seed(42)
  4. n_epochs = 50
  5. t0, t1 = 5, 50 # learning schedule hyperparameters
  6. def learning_schedule(t):
  7. return t0 / (t + t1)
  8. theta = np.random.randn(2,1) # random initialization
  9. for epoch in range(n_epochs):
  10. for i in range(m):
  11. if epoch == 0 and i < 20:
  12. y_predict = X_new_b.dot(theta)
  13. style = "b-" if i > 0 else "r--"
  14. plt.plot(X_new, y_predict, style)
  15. random_index = np.random.randint(m)
  16. xi = X_b[random_index:random_index+1]
  17. yi = y[random_index:random_index+1]
  18. gradients = 2 * xi.T.dot(xi.dot(theta) - yi)
  19. eta = learning_schedule(epoch * m + i)
  20. theta = theta - eta * gradients
  21. theta_path_sgd.append(theta)
  22. plt.plot(X, y, "b.")
  23. plt.xlabel("$x_1$", fontsize=18)
  24. plt.ylabel("$y$", rotation=0, fontsize=18)
  25. plt.axis([0, 2, 0, 15])
  26. save_fig("sgd_plot")
  27. plt.show()

4.3.png

5.多项式回归

利用sklearn.preprocessing的PolynomialFeatures添加新特征,再用LinearRegression拟合。

  1. import numpy as np
  2. import numpy.random as rnd
  3. np.random.seed(42)
  4. m = 100
  5. X = 6 * np.random.rand(m, 1) - 3
  6. y = 0.5 * X**2 + X + 2 + np.random.randn(m, 1)
  7. plt.plot(X, y, "b.")
  8. plt.xlabel("$x_1$", fontsize=18)
  9. plt.ylabel("$y$", rotation=0, fontsize=18)
  10. plt.axis([-3, 3, 0, 10])
  11. plt.show()

4.4.png

  1. from sklearn.preprocessing import PolynomialFeatures
  2. poly_features = PolynomialFeatures(degree=2, include_bias=False)
  3. X_poly = poly_features.fit_transform(X)
  4. X[0]
  1. lin_reg = LinearRegression()
  2. lin_reg.fit(X_poly, y)
  3. lin_reg.intercept_, lin_reg.coef_
  1. X_new=np.linspace(-3, 3, 100).reshape(100, 1)
  2. X_new_poly = poly_features.transform(X_new)
  3. y_new = lin_reg.predict(X_new_poly)
  4. plt.plot(X, y, "b.")
  5. plt.plot(X_new, y_new, "r-", linewidth=2, label="Predictions")
  6. plt.xlabel("$x_1$", fontsize=18)
  7. plt.ylabel("$y$", rotation=0, fontsize=18)
  8. plt.legend(loc="upper left", fontsize=14)
  9. plt.axis([-3, 3, 0, 10])
  10. plt.show()

4.5.png
变换degree为1,2,300.

  1. from sklearn.preprocessing import StandardScaler
  2. from sklearn.pipeline import Pipeline
  3. for style, width, degree in (("g-", 1, 300), ("b--", 2, 2), ("r-+", 2, 1)):
  4. polybig_features = PolynomialFeatures(degree=degree, include_bias=False)
  5. std_scaler = StandardScaler()
  6. lin_reg = LinearRegression()
  7. polynomial_regression = Pipeline([
  8. ("poly_features", polybig_features),
  9. ("std_scaler", std_scaler),
  10. ("lin_reg", lin_reg),
  11. ])
  12. polynomial_regression.fit(X, y)
  13. y_newbig = polynomial_regression.predict(X_new)
  14. plt.plot(X_new, y_newbig, style, label=str(degree), linewidth=width)
  15. plt.plot(X, y, "b.", linewidth=3)
  16. plt.legend(loc="upper left")
  17. plt.xlabel("$x_1$", fontsize=18)
  18. plt.ylabel("$y$", rotation=0, fontsize=18)
  19. plt.axis([-3, 3, 0, 10])
  20. plt.show()

4.6.png

6.学习曲线

欠拟合曲线与过拟合曲线
偏差/方差权衡

7.正则化

7.1 岭回归

  1. from sklearn.linear_model import Ridge
  2. ridge_reg = Ridge(alpha=1, solver="cholesky", random_state=42)
  3. ridge_reg.fit(X, y)
  4. ridge_reg.predict([[1.5]])

7.2Lasso回归

Lasso回归的一个重要特点是它倾向于完全消除掉最不重要特征的权重(也就是将它设置为0)

7.3弹性网络

弹性网络是介于岭回归和Lasso回归之间的中间地带。正则项是Ridge和Lasso正则项的简单混合。
一般而言,弹性网络优于Lasso回归,因为当特征数量超过训练实例数量,又或者是几个特征强相关时,Lasso回归的表现可能非常不稳定。sklearn中为ElasticNet

7.4提前停止

对于梯度下降这一类迭代算法,还有一个与众不同的正则化方法,就是在验证误差达到最小值时停止训练,该方法叫做提前停止法。

8.逻辑回归

8.1 Softmax回归

Softmax回归分类器一次只能预测一个类,因此它只能与互斥的类一起使用。无法使用它在一张照片中识别多个人。