梯度下降 https://towardsdatascience.com/gradient-descent-algorithm-a-deep-dive-cf04e8115f21
梯度下降仅适用于可微凹函数

CNN架构

dbeb2b65-bc58-4691-bb4b-79bd034dcdcc.png

构建CNN模型

  1. 导入包
  2. 收集数据
  3. 准备数据
  4. 构建模型
  5. 训练模型
  6. 评估模型的准确性

导入库

  1. !pip install pytorch-lightning
  2. !pip install opendatasets
  3. import os
  4. import shutil
  5. import opendatasets as od
  6. import pandas as pd
  7. import numpy as np
  8. from PIL import Image
  9. from sklearn.metrics import confusion_matrix
  10. from sklearn.model_selection import train_test_split
  11. import matplotlib.pyplot as plt
  12. import torch
  13. from torch import nn, optim
  14. from torch.utils.data import DataLoader, Dataset
  15. from torch.utils.data.sampler import SubsetRandomSampler
  16. from torchvision.datasets import ImageFolder
  17. import torchvision.transforms as T
  18. from torchvision.utils import make_grid
  19. from torchmetrics.functional import accuracy
  20. import pytorch_lightning as pl

收集数据

  1. dataset_url = 'https://www.kaggle.com/c/histopathologic-cancer-detection'
  2. od.download(dataset_url)
  3. # 查看标签
  4. cancer_labels = pd.read_csv('histopathologic-cancer-detection/train_labels.csv')
  5. cancer_labels.head()
  6. # 从train文件夹中随机选择了10000张图像作为数据集
  7. np.random.seed(0)
  8. train_imgs_orig = os.listdir("histopathologic-cancer-detection/train")
  9. selected_image_list = []
  10. for img in np.random.choice(train_imgs_orig, 10000):
  11. selected_image_list.append(img)
  12. len(selected_image_list)
  13. # 8000张训练集 2000张测试集
  14. np.random.seed(0)
  15. np.random.shuffle(selected_image_list)
  16. cancer_train_idx = selected_image_list[:8000]
  17. cancer_test_idx = selected_image_list[8000:]
  18. print("Number of images in the downsampled training dataset: ", len(cancer_train_idx))
  19. print("Number of images in the downsampled testing dataset: ", len(cancer_test_idx))

准备数据