血细胞分类项目
- 数据集:血细胞分类数据集
- 数据处理 dataset.py
- 网络 net.py
- 训练 train.py
- 拿训练集的几张图进行预测
数据集:血细胞分类数据集
https://aistudio.baidu.com/datasetdetail/10278
数据处理 dataset.py
from torchvision import transforms import torchvision import torch import matplotlib.pyplot as plt from PIL import Image #一、数据转换 train_transformer=transforms.Compose( [ transforms.RandomHorizontalFlip(0.2), transforms.RandomRotation(68), transforms.RandomGrayscale(0.2), transforms.Resize((256,256)), transforms.ToTensor(), transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]) ] ) test_transformer=transforms.Compose( [ transforms.Resize((256,256)), transforms.ToTensor(), transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]) ] ) #二、读入数据 train_dataset=torchvision.datasets.ImageFolder( 'E:/Jupytercode/血细胞分类/数据/blood-cells/dataset2-master/dataset2-master/images/TRAIN', transform=train_transformer ) test_dataset=torchvision.datasets.ImageFolder( 'E:/Jupytercode/血细胞分类/数据/blood-cells/dataset2-master/dataset2-master/images/TEST', transform=test_transformer ) #进行编码 #原 {'EOSINOPHIL': 0, 'LYMPHOCYTE': 1, 'MONOCYTE': 2, 'NEUTROPHIL': 3} #转换后 {0: 'EOSINOPHIL', 1: 'LYMPHOCYTE', 2: 'MONOCYTE', 3: 'NEUTROPHIL'} id_to_class={} for k,v in train_dataset.class_to_idx.items(): #print(k,v) id_to_class[v]=k #id_to_class #查看转换后的格式 #三、批次读入数据,可以作为神经网络的输入 一次性拿多少张图片进行训练 Batch_size=64#一次性训练64张 dl_train=torch.utils.data.DataLoader( train_dataset, batch_size=Batch_size, shuffle=True ) dl_test=torch.utils.data.DataLoader( test_dataset, batch_size=Batch_size, shuffle=True ) #取一个批次的数据 # img,label=next(iter(dl_train)) # plt.figure(figsize=(12,8)) # for i,(img,label) in enumerate(zip(img[:8],label[:8])): # img=(img.permute(1,2,0).numpy()+1)/2 # plt.subplot(2,4,i+1) # plt.title(id_to_class.get(label.item())) #0: 'EOSINOPHIL', 1: 'LYMPHOCYTE', 2: 'MONOCYTE', 3: 'NEUTROPHIL' # plt.imshow(img) # plt.show() #查看图片 print("数据处理已完成")
网络 net.py
import torch.nn as nn import torch #建立神经网络 class Net(nn.Module): # 模仿VGG def __init__(self): super(Net, self).__init__() self.layer1 = nn.Sequential( nn.Conv2d(3, 32, kernel_size=3), nn.ReLU(), nn.MaxPool2d(2, 2) ) self.layer2 = nn.Sequential( nn.Conv2d(32, 64, kernel_size=3), nn.ReLU(), nn.MaxPool2d(2, 2) ) self.layer3 = nn.Sequential( nn.Conv2d(64, 128, kernel_size=3), nn.ReLU(), nn.MaxPool2d(2, 2) ) self.layer4 = nn.Sequential( nn.Conv2d(128, 256, kernel_size=3), nn.ReLU(), nn.MaxPool2d(2, 2) ) self.fc = nn.Sequential( nn.Linear(256 * 14 * 14, 1024), nn.ReLU(), nn.Linear(1024, 4) ) def forward(self, x): x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) #print(x.shape) # 将原来的张量 x (四维)重新塑造为一个二维张量。第一个维度的大小由 PyTorch 自动计算,而第二个维度的大小被设置为 256 * 14 * 14 x = x.view(-1, 256 * 14 * 14) x = self.fc(x) return x if __name__ == '__main__': x = torch.rand([8, 3, 256, 256]) model = Net() y = model(x)
训练 train.py
import torch as t import torch.nn as nn from tqdm import tqdm #进度条 import net from dataset import * device = t.device("cuda") if t.cuda.is_available() else t.device("cpu") train_dataset=torchvision.datasets.ImageFolder( 'E:/Jupytercode/血细胞分类/数据/blood-cells/dataset2-master/dataset2-master/images/TRAIN', transform=train_transformer ) test_dataset=torchvision.datasets.ImageFolder( 'E:/Jupytercode/血细胞分类/数据/blood-cells/dataset2-master/dataset2-master/images/TEST', transform=test_transformer ) id_to_class={} for k,v in train_dataset.class_to_idx.items(): #print(k,v) id_to_class[v]=k Batch_size=64#一次性训练64张 dl_train=torch.utils.data.DataLoader( train_dataset, batch_size=Batch_size, shuffle=True ) dl_test=torch.utils.data.DataLoader( test_dataset, batch_size=Batch_size, shuffle=True ) model=net.Net() model = model.to(device) optim=torch.optim.Adam(model.parameters(),lr=0.001) loss_fn=nn.CrossEntropyLoss() def fit(epoch, model, trainloader, testloader): correct = 0 total = 0 running_loss = 0 model.train() # 训练模式下 识别normalize层 for x, y in tqdm(trainloader): x, y = x.to('cuda'), y.to('cuda') y_pred = model(x) loss = loss_fn(y_pred, y) optim.zero_grad() loss.backward() optim.step() with torch.no_grad(): y_pred = torch.argmax(y_pred, dim=1) correct += (y_pred == y).sum().item() total += y.size(0) running_loss += loss.item() epoch_loss = running_loss / len(trainloader.dataset) epoch_acc = correct / total test_correct = 0 test_total = 0 test_running_loss = 0 model.eval() # 验证模式下 不识别normalize层 with torch.no_grad(): for x, y in tqdm(testloader): x, y = x.to('cuda'), y.to('cuda') y_pred = model(x) loss = loss_fn(y_pred, y) y_pred = torch.argmax(y_pred, dim=1) test_correct += (y_pred == y).sum().item() test_total += y.size(0) test_running_loss += loss.item() epoch_test_loss = test_running_loss / len(testloader.dataset) epoch_test_acc = test_correct / test_total if epoch_acc > 0.95: model_state_dict = model.state_dict() torch.save(model_state_dict, './{}{}.pth'.format(epoch_acc, epoch_test_acc)) print('epoch: ', epoch, 'loss: ', round(epoch_loss, 3), 'accuracy:', round(epoch_acc, 3), 'test_loss: ', round(epoch_test_loss, 3), 'test_accuracy:', round(epoch_test_acc, 3) ) return epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc if __name__ == '__main__': epochs = 20 train_loss = [] train_acc = [] test_loss = [] test_acc = [] for epoch in range(epochs): epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc = fit(epoch, model, dl_train, dl_test) train_loss.append(epoch_loss) train_acc.append(epoch_acc) test_loss.append(epoch_test_loss) test_acc.append(epoch_test_acc) plt.plot(range(1, epochs + 1), train_loss, label='train_loss')# 绘制训练损失曲线,使用range(1, epochs+1)生成横坐标轴上的点,train_loss为纵坐标轴上的点 plt.plot(range(1, epochs + 1), test_loss, label='test_loss')# 绘制验证损失曲线,使用range(1, epochs+1)生成横坐标轴上的点,val_loss为纵坐标轴上的点 plt.legend()# 添加图例,label参数在前面的plot中设置,用于区分不同曲线 plt.xlabel('Epochs') # 设置横坐标轴的标签为'Epochs' plt.ylabel('Loss') # 设置纵坐标轴的标签为'Loss' plt.savefig('loss.png') plt.show() plt.plot(range(1, epochs + 1), train_acc, label='train_acc') plt.plot(range(1, epochs + 1), test_acc, label='test_acc') plt.title('Training and Validation Accuracy') # 可以添加标题 plt.xlabel('Epochs') # 为x轴添加标签 plt.ylabel('Accuracy') plt.legend() plt.savefig('acc.png') plt.show() torch.save(model,'Bloodcell.pkl') #保存模型训练权重
在深度学习中,模型通常具有两种运行模式:训练模式和验证/测试模式。这两种模式的主要区别在于模型的行为和参数更新方式。
- 训练模式(Training Mode):
在训练模式下,模型会执行以下操作:
①梯度计算: 计算模型参数关于损失函数的梯度,以便进行反向传播。
②参数更新: 根据梯度和优化算法,更新模型的参数以最小化损失函数。
③Dropout生效: 如果模型中使用了 Dropout 层,那么在训练模式下,Dropout 会生效,即在前向传播过程中会随机舍弃一些神经元,以防止过拟合。
在 PyTorch 中,通过 model.train() 将模型设置为训练模式:
model.train()
- 验证/测试模式(Validation/Testing Mode):
在验证/测试模式下,模型会执行以下操作:
①梯度计算: 不计算梯度,因为在验证/测试过程中不需要更新模型参数。
②Dropout不生效: 如果使用了 Dropout 层,那么在验证/测试模式下,Dropout 不生效,所有神经元都参与前向传播。
③评估模型性能: 使用模型进行预测,并评估模型在验证集或测试集上的性能。
在 PyTorch 中,通过 model.eval() 将模型设置为验证/测试模式:
model.eval()
切换模型的运行模式是为了确保在不同阶段使用正确的行为。在训练模式下,模型需要进行梯度计算和参数更新,而在验证/测试模式下,模型不需要进行参数更新,而是专注于性能评估。
拿训练集的几张图进行预测
预测pred.py
from dataset import * model=torch.load('Bloodcell.pkl') img,label=next(iter(dl_test)) #选取一些图片进行预测 img=img.to('cuda') model.eval() pred=model(img) pred_re=torch.argmax(pred, dim=1) pred_re=pred_re.cpu().numpy() pred_re=pred_re.tolist() for i in pred_re[0:8]: print(id_to_class[i]) id_to_class[pred_re[0:8][1]] plt.figure(figsize=(16,8)) img=img.cpu()#把图片重新放到CPU上 for i,(img,label) in enumerate(zip(img[:8],label[:8])): img=(img.permute(1,2,0).numpy()+1)/2 plt.subplot(2,4,i+1) pred_title=id_to_class[pred_re[0:8][i]] plt.title('R:{},P:{}'.format(id_to_class.get(label.item()),pred_title)) plt.imshow(img) plt.show()
- 训练模式(Training Mode):
猜你喜欢
网友评论
- 搜索
- 最新文章
- 热门文章