k-NN回顾
k-NN基本实现
import numpy as np
import pandas as pd
## 读取数据
root = '../cifar-10-batches-py/'
# a = pd.read_pickle(path=root+'data_batch_1')
# print(a.keys())
Xtr = np.zeros((50000, 32*32*3))
ytr = np.zeros((50000, 1))
for i in range(1, 6):
Xtr[(i-1)*10000:i*10000, :] = pd.read_pickle(path=root+'data_batch_'+str(i))['data']
ytr[(i-1)*10000:i*10000] = np.reshape(pd.read_pickle(path=root+'data_batch_'+str(i))['labels'],(10000,1))
Xte = pd.read_pickle(path=root+'test_batch')['data']
yte = np.reshape(pd.read_pickle(path=root+'test_batch')['labels'],(10000,1))
## 定义KNN
class kNN(object):
def __init__(self,X,y):
self.Xtr = X
self.ytr = y
def forward(self, Xte, K):
"""
Xte:测试集输入
K:kNN的k
"""
Ypred = np.zeros((Xte.shape[0], K),dtype = self.ytr.dtype)
for i in range(Xte.shape[0]):
distances = np.sum(np.abs(self.Xtr - Xte[i,:]), axis = 1) # 计算训练集里面每一幅图片和测试图片的距离
min_indexs = np.argsort(distances)[0:K]
# print(min_indexs)
## 从min_indexs中获取重复次数最多的一个类
min_lables = [self.ytr[i][0] for i in min_indexs] # ytr[i][0] 是因为type(ytr[i]) => numpy.ndarray
# print(np.array(min_lables))
Ypred[i] = np.argmax(np.bincount(min_lables))
# print(i)
return Ypred
nn = kNN(Xtr,ytr)
N = 10
Yte_predict = nn.forward(Xte[0:N,:],5) #取100个数据验证
print('ACC:%f'%(np.mean(Yte_predict==yte[0:N])))
ACC:0.500000
!jupyter nbconvert --to markdown k-NN.ipynb
[NbConvertApp] Converting notebook k-NN.ipynb to markdown
[NbConvertApp] Support files will be in k-NN_files\
[NbConvertApp] Making directory k-NN_files
[NbConvertApp] Writing 4017 bytes to k-NN.md