数据描述

MNIST 数据集可在 http://yann.lecun.com/exdb/mnist/ 获取, 它包含了四个部分:

  • Training set images: train-images-idx3-ubyte.gz (9.9 MB, 解压后 47 MB, 包含 60,000 个样本)
  • Training set labels: train-labels-idx1-ubyte.gz (29 KB, 解压后 60 KB, 包含 60,000 个标签)
  • Test set images: t10k-images-idx3-ubyte.gz (1.6 MB, 解压后 7.8 MB, 包含 10,000 个样本)
  • Test set labels: t10k-labels-idx1-ubyte.gz (5KB, 解压后 10 KB, 包含 10,000 个标签)

MNIST 数据集来自美国国家标准与技术研究所, National Institute of Standards and Technology (NIST). 训练集 (training set) 由来自 250 个不同人手写的数字构成, 其中 50% 是高中学生, 50% 来自人口普查局 (the Census Bureau) 的工作人员. 测试集(test set) 也是同样比例的手写数字数据.

keras普通实现

  1. from keras.datasets import mnist
  2. from keras.models import Sequential
  3. from keras.layers import Dense
  4. import matplotlib.pyplot as plt
  5. from sklearn.preprocessing import OneHotEncoder
  6. def load_data():
  7. (x_train, y_train), (x_test, y_test) = mnist.load_data()
  8. x_train = x_train.reshape(x_train.shape[0], 28 * 28)
  9. x_test = x_test.reshape(x_test.shape[0], 28 * 28)
  10. y_train = OneHotEncoder().fit_transform(y_train.reshape(-1,1))
  11. y_test = OneHotEncoder().fit_transform(y_test.reshape(-1,1))
  12. return (x_train, y_train), (x_test, y_test)
  13. def build_model():
  14. model = Sequential()
  15. model.add(Dense(128, activation='relu', input_shape=(784, )))
  16. model.add(Dense(64, activation='relu'))
  17. model.add(Dense(32, activation='relu'))
  18. model.add(Dense(16, activation='relu'))
  19. model.add(Dense(10, activation='softmax'))
  20. model.compile(optimizer='Adam',
  21. loss='categorical_crossentropy',
  22. metrics=['accuracy'])
  23. return model
  24. #画出训练过程中训练和验证的精度与损失
  25. def draw_train_history(history):
  26. plt.figure(1)
  27. # summarize history for accuracy
  28. plt.subplot(211)
  29. plt.plot(history.history['accuracy'])
  30. plt.plot(history.history['val_accuracy'])
  31. plt.title('model accuracy')
  32. plt.ylabel('accuracy')
  33. plt.xlabel('epoch')
  34. plt.legend(['train', 'validation'])
  35. # summarize history for loss
  36. plt.subplot(212)
  37. plt.plot(history.history['loss'])
  38. plt.plot(history.history['val_loss'])
  39. plt.title('model loss')
  40. plt.ylabel('loss')
  41. plt.xlabel('epoch')
  42. plt.legend(['train', 'validation'])
  43. plt.show()
  44. if __name__ == '__main__':
  45. (x_train, y_train), (x_test, y_test) = load_data()
  46. model = build_model()
  47. history = model.fit(x_train, y_train,
  48. epochs=20,
  49. batch_size=64,
  50. validation_split=0.3)
  51. draw_train_history(history)
  52. model.save("classification.h5")
  53. loss, accuracy = model.evaluate(x_test, y_test)
  54. print("test loss: {}, test accuracy: {}".format(loss, accuracy))
  55. weights = model.get_weights()
  56. print("weights: ", weights)

模型结构

分类-keras - 图1

模型输出

  1. test loss: 0.13618469612854534, test accuracy: 0.972000002861023

模型损失以及准确率曲线

分类-keras - 图2

keras-scikit_learn集成学习

  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. from keras.datasets import mnist
  4. from keras.wrappers.scikit_learn import KerasClassifier
  5. from keras.models import Sequential
  6. from keras.layers import Dense
  7. from sklearn.preprocessing import LabelEncoder
  8. from sklearn.ensemble import VotingClassifier
  9. from sklearn.model_selection import cross_val_score
  10. from pathlib import Path
  11. def load_data():
  12. (x_train, y_train), (x_test, y_test) = mnist.load_data()
  13. x_train = x_train.reshape(x_train.shape[0], 28 * 28)
  14. x_test = x_test.reshape(x_test.shape[0], 28 * 28)
  15. y_train = LabelEncoder().fit_transform(y_train.reshape(-1,1))
  16. y_test = LabelEncoder().fit_transform(y_test.reshape(-1,1))
  17. return (x_train, y_train), (x_test, y_test)
  18. #画出训练过程中训练和验证的精度与损失
  19. def draw_train_history(history):
  20. plt.figure(1)
  21. # summarize history for accuracy
  22. plt.subplot(211)
  23. plt.plot(history.history['accuracy'])
  24. plt.plot(history.history['val_accuracy'])
  25. plt.title('model accuracy')
  26. plt.ylabel('accuracy')
  27. plt.xlabel('epoch')
  28. plt.legend(['train', 'validation'])
  29. # summarize history for loss
  30. plt.subplot(212)
  31. plt.plot(history.history['loss'])
  32. plt.plot(history.history['val_loss'])
  33. plt.title('model loss')
  34. plt.ylabel('loss')
  35. plt.xlabel('epoch')
  36. plt.legend(['train', 'validation'])
  37. plt.show()
  38. def build_model(hidden_units):
  39. model = Sequential()
  40. for index, unit in enumerate(hidden_units):
  41. if index == 0:
  42. model.add(Dense(unit, activation='relu', input_shape=(784, )))
  43. else:
  44. model.add(Dense(unit, activation='relu'))
  45. model.add(Dense(10, activation='softmax'))
  46. model.compile(optimizer='Adam',
  47. loss='categorical_crossentropy',
  48. metrics=['accuracy'])
  49. return model
  50. def build_model1():
  51. model = Sequential()
  52. model.add(Dense(128, activation='relu', input_shape=(784, )))
  53. model.add(Dense(64, activation='relu'))
  54. model.add(Dense(10, activation='softmax'))
  55. model.compile(optimizer='Adam',
  56. loss='categorical_crossentropy',
  57. metrics=['accuracy'])
  58. return model
  59. def build_model2():
  60. model = Sequential()
  61. model.add(Dense(64, activation='relu', input_shape=(784, )))
  62. model.add(Dense(32, activation='relu'))
  63. model.add(Dense(10, activation='softmax'))
  64. model.compile(optimizer='Adam',
  65. loss='categorical_crossentropy',
  66. metrics=['accuracy'])
  67. return model
  68. def build_model3():
  69. model = Sequential()
  70. model.add(Dense(32, activation='relu', input_shape=(784, )))
  71. model.add(Dense(16, activation='relu'))
  72. model.add(Dense(10, activation='softmax'))
  73. model.compile(optimizer='Adam',
  74. loss='categorical_crossentropy',
  75. metrics=['accuracy'])
  76. return model
  77. if __name__ == '__main__':
  78. (x_train, y_train), (x_test, y_test) = load_data()
  79. model1 = KerasClassifier(build_fn=build_model1, epochs=20, batch_size=64)
  80. model1._estimator_type = "classifier"
  81. model2 = KerasClassifier(build_fn=build_model2, epochs=20, batch_size=64)
  82. model2._estimator_type = "classifier"
  83. model3 = KerasClassifier(build_fn=build_model3, epochs=20, batch_size=64)
  84. model3._estimator_type = "classifier"
  85. # if ‘hard’, uses predicted class labels for majority rule voting.
  86. # if ‘soft’, predicts the class label based on the argmax of the
  87. # sums of the predicted probabilities,
  88. # which is recommended for an ensemble of well-calibrated classifiers.
  89. cls = VotingClassifier(estimators=(['model1', model1],
  90. ['model2', model2],
  91. ['model3', model3]),
  92. voting='hard')
  93. cls.fit(x_train, y_train)
  94. print("score: ", cls.score(x_test, y_test))

这里需要注意VotingClassifier中的voting参数:

  • hard:少数服从多数
  • soft:通过预测概率和的最大位置作为预测标签,也是比较推荐的方式

而且这里我们使用的是VotingClassifier,在scikit_learn中,也就代表将每个基学习器的结果通过不同的投票策略生成最终的输出,我们来看一下在sklearn文档中的描述

Soft Voting/Majority Rule classifier for unfitted estimators.

如果我们使用StackingClassifier的话,就是接下来要实现的学习法,我们来看一下sklearn文档中的描述

Stacked generalization consists in stacking the output of individual estimator and use a classifier to compute the final prediction. Stacking allows to use the strength of each individual estimator by using their output as input of a final estimator.

模型输出

  1. score: 0.9711

keras集成学习-学习法

  1. import matplotlib.pyplot as plt
  2. from keras.datasets import mnist
  3. from keras.models import Model
  4. from keras.layers import Input, Dense, concatenate
  5. from sklearn.preprocessing import OneHotEncoder
  6. def load_data():
  7. (x_train, y_train), (x_test, y_test) = mnist.load_data()
  8. x_train = x_train.reshape(x_train.shape[0], 28 * 28)
  9. x_test = x_test.reshape(x_test.shape[0], 28 * 28)
  10. y_train = OneHotEncoder().fit_transform(y_train.reshape(-1,1))
  11. y_test = OneHotEncoder().fit_transform(y_test.reshape(-1,1))
  12. return (x_train, y_train), (x_test, y_test)
  13. #画出训练过程中训练和验证的精度与损失
  14. def draw_train_history(history):
  15. plt.figure(1)
  16. # summarize history for accuracy
  17. plt.subplot(211)
  18. plt.plot(history.history['accuracy'])
  19. plt.plot(history.history['val_accuracy'])
  20. plt.title('model accuracy')
  21. plt.ylabel('accuracy')
  22. plt.xlabel('epoch')
  23. plt.legend(['train', 'validation'])
  24. # summarize history for loss
  25. plt.subplot(212)
  26. plt.plot(history.history['loss'])
  27. plt.plot(history.history['val_loss'])
  28. plt.title('model loss')
  29. plt.ylabel('loss')
  30. plt.xlabel('epoch')
  31. plt.legend(['train', 'validation'])
  32. plt.show()
  33. def build_model():
  34. inputs = Input(shape=(784, ))
  35. model1_1 = Dense(64, activation='relu')(inputs)
  36. model2_1 = Dense(128, activation='relu')(inputs)
  37. model3_1 = Dense(32, activation='relu')(inputs)
  38. model1_2 = Dense(32, activation='relu')(model1_1)
  39. model2_2 = Dense(64, activation='relu')(model2_1)
  40. model3_2 = Dense(16, activation='relu')(model3_1)
  41. model1_3 = Dense(16, activation='relu')(model1_2)
  42. model2_3 = Dense(32, activation='relu')(model2_2)
  43. model3_3 = Dense(8, activation='relu')(model3_2)
  44. con = concatenate([model1_3, model2_3, model3_3])
  45. output = Dense(10, activation='softmax')(con)
  46. model = Model(inputs=inputs, outputs=output)
  47. model.compile(optimizer='Adam',
  48. loss='categorical_crossentropy',
  49. metrics=['accuracy'])
  50. return model
  51. if __name__ == '__main__':
  52. (x_train, y_train), (x_test, y_test) = load_data()
  53. model = build_model()
  54. history = model.fit(x_train, y_train,
  55. epochs=20,
  56. batch_size=64,
  57. validation_split=0.3)
  58. draw_train_history(history)
  59. model.save("classification-learning-ensemble.h5")
  60. loss, accuracy = model.evaluate(x_test, y_test)
  61. print("test loss: {}, test accuracy: {}".format(loss, accuracy))

模型结构

分类-keras - 图3

模型输出

  1. test loss: 0.14912846596296658, test accuracy: 0.9682000279426575

模型损失以及准确率曲线

分类-keras - 图4

代码位置

https://github.com/Knowledge-Precipitation-Tribe/Neural-network/tree/master/code/Ensemble-Learning