From b80ddcbdb190efff892b634a46afe6a9a58bd31d Mon Sep 17 00:00:00 2001 From: LiuYuanchi Date: Mon, 1 Apr 2024 21:39:34 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9train.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 + datas/data.md | 25 +++++++++ train.py | 140 +++++++++++++++++++++++++++----------------------- 3 files changed, 104 insertions(+), 63 deletions(-) create mode 100644 datas/data.md diff --git a/.gitignore b/.gitignore index 90e3e68..68a4170 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ __pycache__ assets bag_data bag_data_msk +logs +checkpoints diff --git a/datas/data.md b/datas/data.md new file mode 100644 index 0000000..668107b --- /dev/null +++ b/datas/data.md @@ -0,0 +1,25 @@ +# 原始数据 +``` +train(model_name='FCN8s', epo_num=50, show_vgg_params=False, loss_func='bce', optimizer='sgd', lr=0.02, momentum=0.7) + +``` + +log1.txt +1.png +epoch train loss = 0.016117, epoch test loss = 0.290732, Time 00:00:27 + + +# 学习率对于FCN网络影响 + +``` +train(model_name='FCN8s', epo_num=11, show_vgg_params=False, loss_func='bce', optimizer='sgd', lr=0.01, momentum=0.7) +train(model_name='FCN8s', epo_num=11, show_vgg_params=False, loss_func='bce', optimizer='sgd', lr=0.02, momentum=0.7) +train(model_name='FCN8s', epo_num=11, show_vgg_params=False, loss_func='bce', optimizer='sgd', lr=0.03, momentum=0.7) +``` + +log2.txt +1.png + +epoch train loss = 0.066093, epoch test loss = 0.134848, Time 00:00:23 +epoch train loss = 0.048717, epoch test loss = 0.110684, Time 00:00:23 +epoch train loss = 0.047222, epoch test loss = 0.102705, Time 00:00:23 \ No newline at end of file diff --git a/train.py b/train.py index e7e3225..466d4e3 100644 --- a/train.py +++ b/train.py @@ -1,99 +1,101 @@ from datetime import datetime - import matplotlib.pyplot as plt import numpy as np import torch import torch.nn as nn import torch.optim as optim import visdom - from BagData import test_dataloader, train_dataloader from FCN import FCN8s, FCN16s, FCN32s, FCNs, VGGNet +def train(model_name='FCN8s', epo_num=50, show_vgg_params=False, loss_func='bce', optimizer='sgd', lr=1e-2, momentum=0.7): + vis = visdom.Visdom() + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -def train(epo_num=50, show_vgg_params=False): + # 选择不同的VGGNet和FCN模型 + vgg_model_options = {'VGG16': VGGNet(requires_grad=True, show_params=show_vgg_params), + 'VGG19': VGGNet(model='vgg19', requires_grad=True, show_params=show_vgg_params)} + fcn_model_options = {'FCN8s': FCN8s(pretrained_net=vgg_model_options['VGG16'], n_class=2), + 'FCN16s': FCN16s(pretrained_net=vgg_model_options['VGG16'], n_class=2), + 'FCN32s': FCN32s(pretrained_net=vgg_model_options['VGG16'], n_class=2)} - vis = visdom.Visdom() # 可视化工具 + # 根据输入选择模型 + if model_name in fcn_model_options: + fcn_model = fcn_model_options[model_name] + else: + raise ValueError(f"Invalid model name: {model_name}") + fcn_model = fcn_model.to(device) - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 使用gpu或cpu,若安装cuda默认使用gpu - """调用FCN.py中不同的vgg网络(自选一个),进行定量实验分析""" - vgg_model = VGGNet(requires_grad=True, show_params=show_vgg_params) # backbone网络,用于特征提取 - """调用FCN.py中不同的FCN网络(自选一个),进行定量实验分析""" - fcn_model = FCNs(pretrained_net=vgg_model, n_class=2) # Architecture结构,用于分割 - fcn_model = fcn_model.to(device) # 将模型加载至指定gpu - """更改损失函数为交叉熵nn.CELoss(), 进行定量实验分析""" - criterion = nn.BCELoss().to(device) - """更改模型优化器,如:adam等(自选一个),并调整学习率lr和momentum参数,进行定量实验分析""" - optimizer = optim.SGD(fcn_model.parameters(), lr=1e-2, momentum=0.7) + # 选择不同的损失函数 + if loss_func == 'bce': + criterion = nn.BCELoss().to(device) + elif loss_func == 'ce': + criterion = nn.CrossEntropyLoss().to(device) - all_train_iter_loss = [] # 训练loss存储list - all_test_iter_loss = [] # 测试loss存储list + # 选择不同的优化器和学习率、动量 + if optimizer == 'sgd': + opt = optim.SGD(fcn_model.parameters(), lr=lr, momentum=momentum) + elif optimizer == 'adam': + opt = optim.Adam(fcn_model.parameters(), lr=lr) - # start timing - prev_time = datetime.now() # 训练起始时间记录 - for epo in range(epo_num): # epo_num数据集训练次数 - - train_loss = 0 #初始化loss - fcn_model.train() # 打开训练模式 - for index, (bag, bag_msk) in enumerate(train_dataloader): # 加载数据,bag为数据样本,bag_msk为对应的二进制标签 - # bag.shape is torch.Size([4, 3, 160, 160]) - # bag_msk.shape is torch.Size([4, 2, 160, 160]) + all_train_iter_loss = [] + all_test_iter_loss = [] - bag = bag.to(device) #数据加载至device上 - bag_msk = bag_msk.to(device) #标签加载至device上 + prev_time = datetime.now() + for epo in range(epo_num): + train_loss = 0 + fcn_model.train() + for index, (bag, bag_msk) in enumerate(train_dataloader): + bag = bag.to(device) + bag_msk = bag_msk.to(device) - optimizer.zero_grad() #优化器梯度清空 - output = fcn_model(bag) # 模型forward - output = torch.sigmoid(output) # output.shape is torch.Size([4, 2, 160, 160]) - loss = criterion(output, bag_msk) # 计算模型预测与标签之间的损失 - loss.backward() # 反向传播 + opt.zero_grad() + output = fcn_model(bag) + output = torch.sigmoid(output) + loss = criterion(output, bag_msk) + loss.backward() iter_loss = loss.item() - all_train_iter_loss.append(iter_loss) #loss添加至存储的list里 - train_loss += iter_loss #训练集loss累加 - optimizer.step() # 模型参数优化更新 + all_train_iter_loss.append(iter_loss) + train_loss += iter_loss + opt.step() - output_np = output.cpu().detach().numpy().copy() # output_np.shape = (4, 2, 160, 160) + output_np = output.cpu().detach().numpy().copy() output_np = np.argmin(output_np, axis=1) - bag_msk_np = bag_msk.cpu().detach().numpy().copy() # bag_msk_np.shape = (4, 2, 160, 160) + bag_msk_np = bag_msk.cpu().detach().numpy().copy() bag_msk_np = np.argmin(bag_msk_np, axis=1) if np.mod(index, 15) == 0: - print('epoch {}, {}/{},train loss is {}'.format(epo, index, len(train_dataloader), iter_loss)) - # vis.close() - """结果可视化""" - vis.images(output_np[:, None, :, :], win='train_pred', opts=dict(title='train prediction')) + # print('epoch {}, {}/{},train loss is {}'.format(epo, index, len(train_dataloader), iter_loss)) + vis.images(output_np[:, None, :, :], win='train_pred', opts=dict(title='train prediction')) vis.images(bag_msk_np[:, None, :, :], win='train_label', opts=dict(title='label')) - vis.line(all_train_iter_loss, win='train_iter_loss',opts=dict(title='train iter loss')) - - test_loss = 0 - fcn_model.eval() # 模型打开测试模式 - with torch.no_grad(): # 测试阶段取消梯度 - for index, (bag, bag_msk) in enumerate(test_dataloader): + vis.line(all_train_iter_loss, win='train_iter_loss', opts=dict(title='train iter loss')) + test_loss = 0 + fcn_model.eval() + with torch.no_grad(): + for index, (bag, bag_msk) in enumerate(test_dataloader): bag = bag.to(device) bag_msk = bag_msk.to(device) - optimizer.zero_grad() + opt.zero_grad() output = fcn_model(bag) - output = torch.sigmoid(output) # output.shape is torch.Size([4, 2, 160, 160]) + output = torch.sigmoid(output) loss = criterion(output, bag_msk) iter_loss = loss.item() all_test_iter_loss.append(iter_loss) test_loss += iter_loss - output_np = output.cpu().detach().numpy().copy() # output_np.shape = (4, 2, 160, 160) + output_np = output.cpu().detach().numpy().copy() output_np = np.argmin(output_np, axis=1) - bag_msk_np = bag_msk.cpu().detach().numpy().copy() # bag_msk_np.shape = (4, 2, 160, 160) + bag_msk_np = bag_msk.cpu().detach().numpy().copy() bag_msk_np = np.argmin(bag_msk_np, axis=1) - + if np.mod(index, 15) == 0: - print(r'Testing... Open http://localhost:8097/ to see test result.') - # vis.close() - vis.images(output_np[:, None, :, :], win='test_pred', opts=dict(title='test prediction')) + # print(r'Testing... Open http://localhost:8097/ to see test result.') + vis.images(output_np[:, None, :, :], win='test_pred', opts=dict(title='test prediction')) vis.images(bag_msk_np[:, None, :, :], win='test_label', opts=dict(title='label')) vis.line(all_test_iter_loss, win='test_iter_loss', opts=dict(title='test iter loss')) - """显示模型运行时间""" cur_time = datetime.now() h, remainder = divmod((cur_time - prev_time).seconds, 3600) m, s = divmod(remainder, 60) @@ -101,14 +103,26 @@ def train(epo_num=50, show_vgg_params=False): prev_time = cur_time print('epoch train loss = %f, epoch test loss = %f, %s' - %(train_loss/len(train_dataloader), test_loss/len(test_dataloader), time_str)) - - """每5个epoch保存一次模型到checkpoints路径""" - if np.mod(epo, 5) == 0: - torch.save(fcn_model, 'checkpoints/fcn_model_{}.pt'.format(epo)) - print('saveing checkpoints/fcn_model_{}.pt'.format(epo)) + % (train_loss / len(train_dataloader), test_loss / len(test_dataloader), time_str)) + # 每10个训练轮次保存一次检查点 + if np.mod(epo, 10) == 0: + checkpoint_name = f'checkpoints/fcn_model_{model_name}_ep{epo}_lr{lr}_mom{momentum}.pt' + torch.save(fcn_model, checkpoint_name) + print(f'Saving checkpoint: {checkpoint_name}') if __name__ == "__main__": + # 调用训练函数,并修改不同的参数 + + ## 1 + #train(model_name='FCN8s', epo_num=11, show_vgg_params=False, loss_func='bce', optimizer='sgd', lr=0.02, momentum=0.7) - train(epo_num=100, show_vgg_params=False) #调用训练函数开始训练,训练epoch数为100 + + ## 2 + # train(model_name='FCN8s', epo_num=11, show_vgg_params=False, loss_func='bce', optimizer='sgd', lr=0.01, momentum=0.7) + # train(model_name='FCN8s', epo_num=11, show_vgg_params=False, loss_func='bce', optimizer='sgd', lr=0.02, momentum=0.7) + # train(model_name='FCN8s', epo_num=11, show_vgg_params=False, loss_func='bce', optimizer='sgd', lr=0.03, momentum=0.7) + train(model_name='FCN8s', epo_num=11, show_vgg_params=False, loss_func='bce', optimizer='sgd', lr=0.05, momentum=0.7) + + #train(model_name='FCN16s', epo_num=11, show_vgg_params=False, loss_func='ce', optimizer='adam', lr=1e-3) + #train(model_name='FCN32s', epo_num=100, show_vgg_params=False, loss_func='bce', optimizer='sgd', lr=0.01, momentum=0.5) \ No newline at end of file