什么是支撑向量机

svm 尝试寻找一个最优的决策边界
距离两个类别最近的样本最远
最近的样本称做支撑向量
SVM 最大化margin
线性可分 hard margin SVM
线性不可分 soft margin SVM

线性可分

  1. from sklearn import datasets
  2. import numpy as np
  3. from matplotlib import pyplot as plt
  4. iris = datasets.load_iris()
  5. X = iris.data
  6. y = iris.target
  7. X = X[y<2, :2]
  8. y = y[y<2]
  9. plt.scatter(X[y==0, 0], X[y==0, 1])
  10. plt.scatter(X[y==1, 0], X[y==1, 1])

image.png

  1. from sklearn.preprocessing import StandardScaler
  2. std = StandardScaler()
  3. X_STD = std.fit_transform(X)
  4. from sklearn.svm import LinearSVC
  5. svc = LinearSVC(C=1e9)
  6. svc.fit(X_STD, y)
  7. def plot_decision_boundary(model, axis):
  8. x0, x1 = np.meshgrid(
  9. np.linspace(axis[0], axis[1], int((axis[1]-axis[0])*100)).reshape(-1, 1),
  10. np.linspace(axis[2], axis[3], int((axis[3]-axis[2])*100)).reshape(-1, 1),
  11. )
  12. X_new = np.c_[x0.ravel(), x1.ravel()]
  13. y_predict = model.predict(X_new)
  14. zz = y_predict.reshape(x0.shape)
  15. from matplotlib.colors import ListedColormap
  16. custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
  17. plt.contourf(x0, x1, zz, linewidth=5, cmap=custom_cmap)
  18. plot_decision_boundary(svc, axis=[-3, 3, -3, 3])
  19. plt.scatter(X_STD[y==0, 0], X_STD[y==0, 1])
  20. plt.scatter(X_STD[y==1, 0], X_STD[y==1, 1])

image.png

  1. svc2 = LinearSVC(C=0.01)
  2. svc2.fit(X_STD, y)
  3. plot_decision_boundary(svc2, axis=[-3, 3, -3, 3])
  4. plt.scatter(X_STD[y==0, 0], X_STD[y==0, 1])
  5. plt.scatter(X_STD[y==1, 0], X_STD[y==1, 1])

image.png

多项式特征

  1. from sklearn import datasets
  2. from sklearn.preprocessing import PolynomialFeatures
  3. from sklearn.pipeline import Pipeline
  4. X, y = datasets.make_moons(noise=0.15, random_state=666)
  5. plt.scatter(X[y==0, 0], X[y==0, 1])
  6. plt.scatter(X[y==1, 0], X[y==1, 1])

image.png

  1. def PolynomialSVM(degree, C=1):
  2. return Pipeline([
  3. ("poly", PolynomialFeatures(degree=degree)),
  4. ("std", StandardScaler()),
  5. ("svm", LinearSVC(C=C))
  6. ])
  7. poly_svc = PolynomialSVM(3)
  8. poly_svc.fit(X, y)
  9. plot_decision_boundary(poly_svc, axis=[-1.5, 3, -1.5, 1.5])
  10. plt.scatter(X[y==0, 0], X[y==0, 1])
  11. plt.scatter(X[y==1, 0], X[y==1, 1])

image.png

多项式核

  1. from sklearn.svm import SVC
  2. def PolynomialKernelSVC(degree, C=1):
  3. return Pipeline([
  4. ('std', StandardScaler()),
  5. ('kernel_svc',SVC(kernel="poly", degree=degree, C=C))
  6. ])
  7. poly_kernel_svc = PolynomialKernelSVC(5, 1)
  8. poly_kernel_svc.fit(X, y)
  9. plot_decision_boundary(poly_kernel_svc, axis=[-1.5, 3, -1.5, 1.5])
  10. plt.scatter(X[y==0, 0], X[y==0, 1])
  11. plt.scatter(X[y==1, 0], X[y==1, 1])

image.png

高斯核函数

  1. def RBFKernelSVC(gamma=1):
  2. return Pipeline([
  3. ('std', StandardScaler()),
  4. ('svc', SVC(kernel='rbf', gamma=gamma))
  5. ])
  6. rbf_svc = RBFKernelSVC()
  7. rbf_svc.fit(X, y)
  8. plot_decision_boundary(rbf_svc, axis=[-1.5, 3, -1.5, 1.5])
  9. plt.scatter(X[y==0, 0], X[y==0, 1])
  10. plt.scatter(X[y==1, 0], X[y==1, 1])

image.png

SVR线性模型

  1. from sklearn.svm import SVR
  2. from sklearn.datasets import load_boston
  3. X = load_boston().data
  4. y = load_boston().target
  5. from sklearn.model_selection import train_test_split
  6. X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)
  7. def PolySVR(gamma):
  8. return Pipeline([
  9. ('std', StandardScaler()),
  10. ('svr', SVR(kernel='rbf', gamma=gamma))
  11. ])
  12. poly_svr = PolySVR(0.1)
  13. poly_svr.fit(X_train, y_train)
  14. from sklearn.metrics import mean_squared_error
  15. mean_squared_error(y_test, poly_svr.predict(X_test))
  16. poly_svr.score(X_test, y_test)