数据集介绍

每个数字都是一张2020灰度图像,展开之后每个样本对应一个400维度的向量,共有5000个样本,即5000400的矩阵,标签10代表数字0。

读取并可视化数据

该数据集以.mat格式保存。mat格式用于保存matlab程序中的数据。包括文件头部分和数据部分,可以使用from scipy.io import loadmat来读取该文件。
image.png
读取完成后是X Y两个矩阵。X是5000400,Y是50001代表标签。

  1. # Setup the parameters you will use for this part of the exercise
  2. # 20x20 Input Imc c.reshape(2,8,order='F')= a.ravel(order='F')ages of Digits
  3. input_layer_size = 400
  4. # 10 labels, from 1 to 10
  5. num_labels = 10
  6. # =========== Part 1: Loading and Visualizing Data =============
  7. # 调用的是scipy.io import loadmat函数
  8. data = load_mat_data("ex3data1.mat")
  9. X = data['X']
  10. y = data['y']
  11. m = len(y)
  12. # Load Training Data
  13. print('Loading and Visualizing Data ...\n')
  14. # Randomly select 100 data points to display
  15. #生成一个0到5000的自然顺序向量
  16. shuffle_100_X = np.arange(0, m, 1, dtype=int)
  17. #此函数打乱了0到5000自然序
  18. np.random.shuffle(shuffle_100_X)
  19. #取前100即获取到了随机
  20. sel = X[shuffle_100_X[0:100], :]
  21. display_data(sel)
  22. print('Program paused. Press enter to continue.\n')
  1. def display_data(x, example_width=None):
  2. #默认情况下x-100,n=400
  3. m, n = x.shape
  4. # Set example_width automatically if not passed in
  5. # 如果example_width是空
  6. if not example_width:
  7. #np.round返回浮点数的四舍五入值
  8. example_width = int(np.round(np.sqrt(n)))
  9. example_height = int((n / example_width))
  10. # Compute number of items to display
  11. display_rows = int(np.floor(np.sqrt(m)))
  12. display_cols = int(np.ceil(m / display_rows))
  13. # Between images padding
  14. pad = 1
  15. # Setup blank display
  16. display_array = - np.ones((pad + display_rows * (example_height + pad),
  17. pad + display_cols * (example_width + pad)))
  18. # Copy each example into a patch on the display array
  19. curr_ex = 0
  20. for j in range(display_rows):
  21. for i in range(display_cols):
  22. if curr_ex > m:
  23. break
  24. # Get the max value of the patch
  25. max_val = np.max(np.abs(x[curr_ex, :]))
  26. wait_set_temp = np.reshape(x[curr_ex, :],
  27. (example_height, example_width), order='F') / max_val
  28. height_min_temp = pad + (j - 0) * (example_height + pad)
  29. height_max_temp = height_min_temp + example_height
  30. width_min_temp = pad + (i - 0) * (example_width + pad)
  31. width_max_temp = width_min_temp + example_width
  32. display_array[height_min_temp:height_max_temp, width_min_temp:width_max_temp] = wait_set_temp
  33. curr_ex = curr_ex + 1
  34. if curr_ex > m:
  35. break
  36. plt.ion()
  37. plt.imshow(display_array, cmap="gray") # 选一个漂亮的颜色
  38. plt.pause(1)

逻辑回归向量化(有正则项)

image.png

  1. #假设此处theta为4*1;x为5*4,即x为5个样本,4个特征;y为5*1,作为5个样本的标签值
  2. def lr_cost_function(theta, x, y, lr_lambda):
  3. m = len(y)
  4. sub1 = np.dot((-1 * y).T, np.log(sigmoid(np.dot(x, theta))))
  5. sub2 = np.dot((1 - y.T), np.log(1 - sigmoid(np.dot(x, theta))))
  6. j = (1 / m) * np.sum(sub1 - sub2)
  7. j = j + (lr_lambda / (2 * m)) * np.sum((theta[1:]) ** 2)
  8. grad = (1 / m) * np.dot(x.T, (sigmoid(np.dot(x, theta)) - y))
  9. grad = grad + (lr_lambda / m) * theta
  10. grad[0] = grad[0] - (lr_lambda / m) * theta[0]
  11. return j, grad

一对多分类

  1. def one_vs_all(x, y, num_labels, ova_lambda):
  2. # m=5000,n=400
  3. m, n = x.shape
  4. # 10个分类器的参数为10*401
  5. all_theta = np.zeros((num_labels, n + 1))
  6. # 源数据x加上1列
  7. x = np.append(np.ones((m, 1)), x, axis=1)
  8. for i in range(1, num_labels + 1):
  9. #第一个分类器为401维向量
  10. initial_theta = np.zeros((n + 1, 1))
  11. #属于第i类,将其标记为1,其余全为0
  12. y_temp = (y == i).astype(np.int32)
  13. #ova_lambda为学习率
  14. result = my_fminunc_lr(x, y_temp, initial_theta, ova_lambda)
  15. print('\nIteration: %4d' % result['nit'], ' | Cost: ', result['fun'])
  16. #将计算出的某一个分类器的theta添加到all_theta
  17. all_theta[i - 1, :] = result['x'].T
  18. return all_theta
  19. def predict_one_vs_all(all_theta, x):
  20. m = x.shape[0]
  21. #记录最终属于哪一类
  22. p = np.zeros((m, 1))
  23. #添加第一列为1
  24. x = np.append(np.ones((m, 1)), x, axis=1)
  25. #x*theta
  26. prob = np.dot(x, all_theta.T)
  27. for i in range(m):
  28. p[i] = np.argmax(prob[i, :])
  29. return p

神经网络前向传播

  1. def predict(theta1, theta2, x):
  2. m = x.shape[0]
  3. p = np.zeros((m, 1))
  4. print('\nSize of x : ', x.shape)
  5. x = np.append(np.ones((m, 1)), x, axis=1)
  6. a2 = sigmoid(np.dot(theta1, x.T))
  7. print('\nSize of a2 : ', a2.shape)
  8. a2 = np.append(np.ones((1, a2.shape[1])), a2, axis=0)
  9. a3 = sigmoid(np.dot(theta2, a2))
  10. print('\nSize of a3 : ', a3.shape)
  11. for i in range(m):
  12. p[i] = np.argmax(a3[:, i])
  13. return p + 1