数据集介绍
每个数字都是一张2020灰度图像,展开之后每个样本对应一个400维度的向量,共有5000个样本,即5000400的矩阵,标签10代表数字0。
读取并可视化数据
该数据集以.mat格式保存。mat格式用于保存matlab程序中的数据。包括文件头部分和数据部分,可以使用from scipy.io import loadmat来读取该文件。
读取完成后是X Y两个矩阵。X是5000400,Y是50001代表标签。
# Setup the parameters you will use for this part of the exercise# 20x20 Input Imc c.reshape(2,8,order='F')= a.ravel(order='F')ages of Digitsinput_layer_size = 400# 10 labels, from 1 to 10num_labels = 10# =========== Part 1: Loading and Visualizing Data =============# 调用的是scipy.io import loadmat函数data = load_mat_data("ex3data1.mat")X = data['X']y = data['y']m = len(y)# Load Training Dataprint('Loading and Visualizing Data ...\n')# Randomly select 100 data points to display#生成一个0到5000的自然顺序向量shuffle_100_X = np.arange(0, m, 1, dtype=int)#此函数打乱了0到5000自然序np.random.shuffle(shuffle_100_X)#取前100即获取到了随机sel = X[shuffle_100_X[0:100], :]display_data(sel)print('Program paused. Press enter to continue.\n')
def display_data(x, example_width=None):#默认情况下x-100,n=400m, n = x.shape# Set example_width automatically if not passed in# 如果example_width是空if not example_width:#np.round返回浮点数的四舍五入值example_width = int(np.round(np.sqrt(n)))example_height = int((n / example_width))# Compute number of items to displaydisplay_rows = int(np.floor(np.sqrt(m)))display_cols = int(np.ceil(m / display_rows))# Between images paddingpad = 1# Setup blank displaydisplay_array = - np.ones((pad + display_rows * (example_height + pad),pad + display_cols * (example_width + pad)))# Copy each example into a patch on the display arraycurr_ex = 0for j in range(display_rows):for i in range(display_cols):if curr_ex > m:break# Get the max value of the patchmax_val = np.max(np.abs(x[curr_ex, :]))wait_set_temp = np.reshape(x[curr_ex, :],(example_height, example_width), order='F') / max_valheight_min_temp = pad + (j - 0) * (example_height + pad)height_max_temp = height_min_temp + example_heightwidth_min_temp = pad + (i - 0) * (example_width + pad)width_max_temp = width_min_temp + example_widthdisplay_array[height_min_temp:height_max_temp, width_min_temp:width_max_temp] = wait_set_tempcurr_ex = curr_ex + 1if curr_ex > m:breakplt.ion()plt.imshow(display_array, cmap="gray") # 选一个漂亮的颜色plt.pause(1)
逻辑回归向量化(有正则项)

#假设此处theta为4*1;x为5*4,即x为5个样本,4个特征;y为5*1,作为5个样本的标签值def lr_cost_function(theta, x, y, lr_lambda):m = len(y)sub1 = np.dot((-1 * y).T, np.log(sigmoid(np.dot(x, theta))))sub2 = np.dot((1 - y.T), np.log(1 - sigmoid(np.dot(x, theta))))j = (1 / m) * np.sum(sub1 - sub2)j = j + (lr_lambda / (2 * m)) * np.sum((theta[1:]) ** 2)grad = (1 / m) * np.dot(x.T, (sigmoid(np.dot(x, theta)) - y))grad = grad + (lr_lambda / m) * thetagrad[0] = grad[0] - (lr_lambda / m) * theta[0]return j, grad
一对多分类
def one_vs_all(x, y, num_labels, ova_lambda):# m=5000,n=400m, n = x.shape# 10个分类器的参数为10*401all_theta = np.zeros((num_labels, n + 1))# 源数据x加上1列x = np.append(np.ones((m, 1)), x, axis=1)for i in range(1, num_labels + 1):#第一个分类器为401维向量initial_theta = np.zeros((n + 1, 1))#属于第i类,将其标记为1,其余全为0y_temp = (y == i).astype(np.int32)#ova_lambda为学习率result = my_fminunc_lr(x, y_temp, initial_theta, ova_lambda)print('\nIteration: %4d' % result['nit'], ' | Cost: ', result['fun'])#将计算出的某一个分类器的theta添加到all_thetaall_theta[i - 1, :] = result['x'].Treturn all_thetadef predict_one_vs_all(all_theta, x):m = x.shape[0]#记录最终属于哪一类p = np.zeros((m, 1))#添加第一列为1x = np.append(np.ones((m, 1)), x, axis=1)#x*thetaprob = np.dot(x, all_theta.T)for i in range(m):p[i] = np.argmax(prob[i, :])return p
神经网络前向传播
def predict(theta1, theta2, x):m = x.shape[0]p = np.zeros((m, 1))print('\nSize of x : ', x.shape)x = np.append(np.ones((m, 1)), x, axis=1)a2 = sigmoid(np.dot(theta1, x.T))print('\nSize of a2 : ', a2.shape)a2 = np.append(np.ones((1, a2.shape[1])), a2, axis=0)a3 = sigmoid(np.dot(theta2, a2))print('\nSize of a3 : ', a3.shape)for i in range(m):p[i] = np.argmax(a3[:, i])return p + 1
