Lasso和Elastic Net(弹性网络)在稀疏信号上的表现

翻译者:@Loopy
校验者:@barrycg

评估了Lasso回归模型和弹性网络回归模型在手动生成的,并附加噪声的稀疏信号上的表现,并将回归系数与真实值进行了比较。

  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. from sklearn.metrics import r2_score
  4. # 产生一些稀疏值
  5. np.random.seed(42)
  6. n_samples, n_features = 50, 100
  7. X = np.random.randn(n_samples, n_features)
  1. # 减少交替出现的符号以使其便于可视化
  2. idx = np.arange(n_features)
  3. coef = (-1) ** idx * np.exp(-idx / 10)
  4. coef[10:] = 0 # sparsify coef
  5. y = np.dot(X, coef)
  1. # 添加噪音
  2. y += 0.01 * np.random.normal(size=n_samples)
  1. # 划分测试,训练集
  2. n_samples = X.shape[0]
  3. X_train, y_train = X[:n_samples // 2], y[:n_samples // 2]
  4. X_test, y_test = X[n_samples // 2:], y[n_samples // 2:]
  1. # Lasso
  2. from sklearn.linear_model import Lasso
  3. alpha = 0.1
  4. lasso = Lasso(alpha=alpha)
  5. y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test)
  6. r2_score_lasso = r2_score(y_test, y_pred_lasso)
  7. print(lasso)
  8. print("r^2 on test data : %f" % r2_score_lasso)
  1. Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
  2. normalize=False, positive=False, precompute=False, random_state=None,
  3. selection='cyclic', tol=0.0001, warm_start=False)
  4. r^2 on test data : 0.658064
  1. # 弹性网络(ElasticNet)
  2. from sklearn.linear_model import ElasticNet
  3. enet = ElasticNet(alpha=alpha, l1_ratio=0.7)
  4. y_pred_enet = enet.fit(X_train, y_train).predict(X_test)
  5. r2_score_enet = r2_score(y_test, y_pred_enet)
  6. print(enet)
  7. print("r^2 on test data : %f" % r2_score_enet)
  1. ElasticNet(alpha=0.1, copy_X=True, fit_intercept=True, l1_ratio=0.7,
  2. max_iter=1000, normalize=False, positive=False, precompute=False,
  3. random_state=None, selection='cyclic', tol=0.0001, warm_start=False)
  4. r^2 on test data : 0.642515
  1. m, s, _ = plt.stem(np.where(enet.coef_)[0], enet.coef_[enet.coef_ != 0],
  2. markerfmt='x', label='Elastic net系数')
  3. plt.setp([m, s], color="#2ca02c")
  4. m, s, _ = plt.stem(np.where(lasso.coef_)[0], lasso.coef_[lasso.coef_ != 0],
  5. markerfmt='x', label='Lasso系数')
  6. plt.setp([m, s], color='#ff7f0e')
  7. plt.stem(np.where(coef)[0], coef[coef != 0], label='真实系数',
  8. markerfmt='bx')
  9. plt.legend(loc='best')
  10. plt.title("Lasso $R^2$: %.3f, Elastic Net $R^2$: %.3f"
  11. % (r2_score_lasso, r2_score_enet))
  12. plt.show()

png