数据描述
MNIST 数据集可在 http://yann.lecun.com/exdb/mnist/ 获取, 它包含了四个部分:
- Training set images: train-images-idx3-ubyte.gz (9.9 MB, 解压后 47 MB, 包含 60,000 个样本)
- Training set labels: train-labels-idx1-ubyte.gz (29 KB, 解压后 60 KB, 包含 60,000 个标签)
- Test set images: t10k-images-idx3-ubyte.gz (1.6 MB, 解压后 7.8 MB, 包含 10,000 个样本)
- Test set labels: t10k-labels-idx1-ubyte.gz (5KB, 解压后 10 KB, 包含 10,000 个标签)
MNIST 数据集来自美国国家标准与技术研究所, National Institute of Standards and Technology (NIST). 训练集 (training set) 由来自 250 个不同人手写的数字构成, 其中 50% 是高中学生, 50% 来自人口普查局 (the Census Bureau) 的工作人员. 测试集(test set) 也是同样比例的手写数字数据.
keras普通实现
from keras.datasets import mnistfrom keras.models import Sequentialfrom keras.layers import Denseimport matplotlib.pyplot as pltfrom sklearn.preprocessing import OneHotEncoderdef load_data():(x_train, y_train), (x_test, y_test) = mnist.load_data()x_train = x_train.reshape(x_train.shape[0], 28 * 28)x_test = x_test.reshape(x_test.shape[0], 28 * 28)y_train = OneHotEncoder().fit_transform(y_train.reshape(-1,1))y_test = OneHotEncoder().fit_transform(y_test.reshape(-1,1))return (x_train, y_train), (x_test, y_test)def build_model():model = Sequential()model.add(Dense(128, activation='relu', input_shape=(784, )))model.add(Dense(64, activation='relu'))model.add(Dense(32, activation='relu'))model.add(Dense(16, activation='relu'))model.add(Dense(10, activation='softmax'))model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])return model#画出训练过程中训练和验证的精度与损失def draw_train_history(history):plt.figure(1)# summarize history for accuracyplt.subplot(211)plt.plot(history.history['accuracy'])plt.plot(history.history['val_accuracy'])plt.title('model accuracy')plt.ylabel('accuracy')plt.xlabel('epoch')plt.legend(['train', 'validation'])# summarize history for lossplt.subplot(212)plt.plot(history.history['loss'])plt.plot(history.history['val_loss'])plt.title('model loss')plt.ylabel('loss')plt.xlabel('epoch')plt.legend(['train', 'validation'])plt.show()if __name__ == '__main__':(x_train, y_train), (x_test, y_test) = load_data()model = build_model()history = model.fit(x_train, y_train,epochs=20,batch_size=64,validation_split=0.3)draw_train_history(history)model.save("classification.h5")loss, accuracy = model.evaluate(x_test, y_test)print("test loss: {}, test accuracy: {}".format(loss, accuracy))weights = model.get_weights()print("weights: ", weights)
模型结构

模型输出
test loss: 0.13618469612854534, test accuracy: 0.972000002861023
模型损失以及准确率曲线

keras-scikit_learn集成学习
import numpy as npimport matplotlib.pyplot as pltfrom keras.datasets import mnistfrom keras.wrappers.scikit_learn import KerasClassifierfrom keras.models import Sequentialfrom keras.layers import Densefrom sklearn.preprocessing import LabelEncoderfrom sklearn.ensemble import VotingClassifierfrom sklearn.model_selection import cross_val_scorefrom pathlib import Pathdef load_data():(x_train, y_train), (x_test, y_test) = mnist.load_data()x_train = x_train.reshape(x_train.shape[0], 28 * 28)x_test = x_test.reshape(x_test.shape[0], 28 * 28)y_train = LabelEncoder().fit_transform(y_train.reshape(-1,1))y_test = LabelEncoder().fit_transform(y_test.reshape(-1,1))return (x_train, y_train), (x_test, y_test)#画出训练过程中训练和验证的精度与损失def draw_train_history(history):plt.figure(1)# summarize history for accuracyplt.subplot(211)plt.plot(history.history['accuracy'])plt.plot(history.history['val_accuracy'])plt.title('model accuracy')plt.ylabel('accuracy')plt.xlabel('epoch')plt.legend(['train', 'validation'])# summarize history for lossplt.subplot(212)plt.plot(history.history['loss'])plt.plot(history.history['val_loss'])plt.title('model loss')plt.ylabel('loss')plt.xlabel('epoch')plt.legend(['train', 'validation'])plt.show()def build_model(hidden_units):model = Sequential()for index, unit in enumerate(hidden_units):if index == 0:model.add(Dense(unit, activation='relu', input_shape=(784, )))else:model.add(Dense(unit, activation='relu'))model.add(Dense(10, activation='softmax'))model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])return modeldef build_model1():model = Sequential()model.add(Dense(128, activation='relu', input_shape=(784, )))model.add(Dense(64, activation='relu'))model.add(Dense(10, activation='softmax'))model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])return modeldef build_model2():model = Sequential()model.add(Dense(64, activation='relu', input_shape=(784, )))model.add(Dense(32, activation='relu'))model.add(Dense(10, activation='softmax'))model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])return modeldef build_model3():model = Sequential()model.add(Dense(32, activation='relu', input_shape=(784, )))model.add(Dense(16, activation='relu'))model.add(Dense(10, activation='softmax'))model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])return modelif __name__ == '__main__':(x_train, y_train), (x_test, y_test) = load_data()model1 = KerasClassifier(build_fn=build_model1, epochs=20, batch_size=64)model1._estimator_type = "classifier"model2 = KerasClassifier(build_fn=build_model2, epochs=20, batch_size=64)model2._estimator_type = "classifier"model3 = KerasClassifier(build_fn=build_model3, epochs=20, batch_size=64)model3._estimator_type = "classifier"# if ‘hard’, uses predicted class labels for majority rule voting.# if ‘soft’, predicts the class label based on the argmax of the# sums of the predicted probabilities,# which is recommended for an ensemble of well-calibrated classifiers.cls = VotingClassifier(estimators=(['model1', model1],['model2', model2],['model3', model3]),voting='hard')cls.fit(x_train, y_train)print("score: ", cls.score(x_test, y_test))
这里需要注意VotingClassifier中的voting参数:
- hard:少数服从多数
- soft:通过预测概率和的最大位置作为预测标签,也是比较推荐的方式
而且这里我们使用的是VotingClassifier,在scikit_learn中,也就代表将每个基学习器的结果通过不同的投票策略生成最终的输出,我们来看一下在sklearn文档中的描述
Soft Voting/Majority Rule classifier for unfitted estimators.
如果我们使用StackingClassifier的话,就是接下来要实现的学习法,我们来看一下sklearn文档中的描述
Stacked generalization consists in stacking the output of individual estimator and use a classifier to compute the final prediction. Stacking allows to use the strength of each individual estimator by using their output as input of a final estimator.
模型输出
score: 0.9711
keras集成学习-学习法
import matplotlib.pyplot as pltfrom keras.datasets import mnistfrom keras.models import Modelfrom keras.layers import Input, Dense, concatenatefrom sklearn.preprocessing import OneHotEncoderdef load_data():(x_train, y_train), (x_test, y_test) = mnist.load_data()x_train = x_train.reshape(x_train.shape[0], 28 * 28)x_test = x_test.reshape(x_test.shape[0], 28 * 28)y_train = OneHotEncoder().fit_transform(y_train.reshape(-1,1))y_test = OneHotEncoder().fit_transform(y_test.reshape(-1,1))return (x_train, y_train), (x_test, y_test)#画出训练过程中训练和验证的精度与损失def draw_train_history(history):plt.figure(1)# summarize history for accuracyplt.subplot(211)plt.plot(history.history['accuracy'])plt.plot(history.history['val_accuracy'])plt.title('model accuracy')plt.ylabel('accuracy')plt.xlabel('epoch')plt.legend(['train', 'validation'])# summarize history for lossplt.subplot(212)plt.plot(history.history['loss'])plt.plot(history.history['val_loss'])plt.title('model loss')plt.ylabel('loss')plt.xlabel('epoch')plt.legend(['train', 'validation'])plt.show()def build_model():inputs = Input(shape=(784, ))model1_1 = Dense(64, activation='relu')(inputs)model2_1 = Dense(128, activation='relu')(inputs)model3_1 = Dense(32, activation='relu')(inputs)model1_2 = Dense(32, activation='relu')(model1_1)model2_2 = Dense(64, activation='relu')(model2_1)model3_2 = Dense(16, activation='relu')(model3_1)model1_3 = Dense(16, activation='relu')(model1_2)model2_3 = Dense(32, activation='relu')(model2_2)model3_3 = Dense(8, activation='relu')(model3_2)con = concatenate([model1_3, model2_3, model3_3])output = Dense(10, activation='softmax')(con)model = Model(inputs=inputs, outputs=output)model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])return modelif __name__ == '__main__':(x_train, y_train), (x_test, y_test) = load_data()model = build_model()history = model.fit(x_train, y_train,epochs=20,batch_size=64,validation_split=0.3)draw_train_history(history)model.save("classification-learning-ensemble.h5")loss, accuracy = model.evaluate(x_test, y_test)print("test loss: {}, test accuracy: {}".format(loss, accuracy))
模型结构

模型输出
test loss: 0.14912846596296658, test accuracy: 0.9682000279426575
模型损失以及准确率曲线

代码位置
https://github.com/Knowledge-Precipitation-Tribe/Neural-network/tree/master/code/Ensemble-Learning
