数据集介绍

每个数字都是一张2020灰度图像，展开之后每个样本对应一个400维度的向量，共有5000个样本，即5000400的矩阵，标签10代表数字0。

读取并可视化数据

该数据集以.mat格式保存。mat格式用于保存matlab程序中的数据。包括文件头部分和数据部分，可以使用from scipy.io import loadmat来读取该文件。

读取完成后是X Y两个矩阵。X是5000400，Y是50001代表标签。

# Setup the parameters you will use for this part of the exercise
# 20x20 Input Imc c.reshape(2,8,order='F')= a.ravel(order='F')ages of Digits
input_layer_size = 400
# 10 labels, from 1 to 10
num_labels = 10
# =========== Part 1: Loading and Visualizing Data =============
# 调用的是scipy.io import loadmat函数
data = load_mat_data("ex3data1.mat")
X = data['X']
y = data['y']
m = len(y)
# Load Training Data
print('Loading and Visualizing Data ...\n')
# Randomly select 100 data points to display
#生成一个0到5000的自然顺序向量
shuffle_100_X = np.arange(0, m, 1, dtype=int)
#此函数打乱了0到5000自然序
np.random.shuffle(shuffle_100_X)
#取前100即获取到了随机
sel = X[shuffle_100_X[0:100], :]
display_data(sel)
print('Program paused. Press enter to continue.\n')

def display_data(x, example_width=None):
    #默认情况下x-100，n=400
    m, n = x.shape
    # Set example_width automatically if not passed in
    # 如果example_width是空
    if not example_width:
        #np.round返回浮点数的四舍五入值
        example_width = int(np.round(np.sqrt(n)))
    example_height = int((n / example_width))
    # Compute number of items to display
    display_rows = int(np.floor(np.sqrt(m)))
    display_cols = int(np.ceil(m / display_rows))
    # Between images padding
    pad = 1
    # Setup blank display
    display_array = - np.ones((pad + display_rows * (example_height + pad),
                               pad + display_cols * (example_width + pad)))
    # Copy each example into a patch on the display array
    curr_ex = 0
    for j in range(display_rows):
        for i in range(display_cols):
            if curr_ex > m:
                break
            # Get the max value of the patch
            max_val = np.max(np.abs(x[curr_ex, :]))
            wait_set_temp = np.reshape(x[curr_ex, :],
                                       (example_height, example_width), order='F') / max_val
            height_min_temp = pad + (j - 0) * (example_height + pad)
            height_max_temp = height_min_temp + example_height
            width_min_temp = pad + (i - 0) * (example_width + pad)
            width_max_temp = width_min_temp + example_width
            display_array[height_min_temp:height_max_temp, width_min_temp:width_max_temp] = wait_set_temp
            curr_ex = curr_ex + 1
        if curr_ex > m:
            break
    plt.ion()
    plt.imshow(display_array, cmap="gray")  # 选一个漂亮的颜色
    plt.pause(1)

逻辑回归向量化（有正则项）

#假设此处theta为4*1；x为5*4，即x为5个样本，4个特征；y为5*1，作为5个样本的标签值
def lr_cost_function(theta, x, y, lr_lambda):
    m = len(y)
    sub1 = np.dot((-1 * y).T, np.log(sigmoid(np.dot(x, theta))))
    sub2 = np.dot((1 - y.T), np.log(1 - sigmoid(np.dot(x, theta))))
    j = (1 / m) * np.sum(sub1 - sub2)
    j = j + (lr_lambda / (2 * m)) * np.sum((theta[1:]) ** 2)
    grad = (1 / m) * np.dot(x.T, (sigmoid(np.dot(x, theta)) - y))
    grad = grad + (lr_lambda / m) * theta
    grad[0] = grad[0] - (lr_lambda / m) * theta[0]
    return j, grad

一对多分类

def one_vs_all(x, y, num_labels, ova_lambda):
    # m=5000,n=400
    m, n = x.shape
    # 10个分类器的参数为10*401
    all_theta = np.zeros((num_labels, n + 1))
    # 源数据x加上1列
    x = np.append(np.ones((m, 1)), x, axis=1)
    for i in range(1, num_labels + 1):
        #第一个分类器为401维向量
        initial_theta = np.zeros((n + 1, 1))
        #属于第i类，将其标记为1，其余全为0
        y_temp = (y == i).astype(np.int32)
        #ova_lambda为学习率
        result = my_fminunc_lr(x, y_temp, initial_theta, ova_lambda)
        print('\nIteration: %4d' % result['nit'], ' | Cost: ', result['fun'])
        #将计算出的某一个分类器的theta添加到all_theta
        all_theta[i - 1, :] = result['x'].T
    return all_theta
def predict_one_vs_all(all_theta, x):
    m = x.shape[0]
    #记录最终属于哪一类
    p = np.zeros((m, 1))
    #添加第一列为1
    x = np.append(np.ones((m, 1)), x, axis=1)
    #x*theta
    prob = np.dot(x, all_theta.T)
    for i in range(m):
        p[i] = np.argmax(prob[i, :])
    return p

神经网络前向传播

def predict(theta1, theta2, x):
    m = x.shape[0]
    p = np.zeros((m, 1))
    print('\nSize of x : ', x.shape)
    x = np.append(np.ones((m, 1)), x, axis=1)
    a2 = sigmoid(np.dot(theta1, x.T))
    print('\nSize of a2 : ', a2.shape)
    a2 = np.append(np.ones((1, a2.shape[1])), a2, axis=0)
    a3 = sigmoid(np.dot(theta2, a2))
    print('\nSize of a3 : ', a3.shape)
    for i in range(m):
        p[i] = np.argmax(a3[:, i])
    return p + 1

深度学习

吴恩达机器学习作业3（手写数字识别多分类）

数据集介绍

读取并可视化数据

逻辑回归向量化（有正则项）

一对多分类

神经网络前向传播