下圖為LeNet的結構圖,
主要由2層Convolution Layer+3層Fully-Connected Layer所組成
下面將呈現兩種撰寫LeNet網路結構的方式以及MNIST數字訓練寫法,
補充Conv2d()跟Linear()參數設置如下:
nn.Conv2d(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
nn.Linear(self, in_features, out_features, bias=True)
第一種寫法:
import torch.nn as nn import torch.nn.functional as F class LeNet (nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(3, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16*5*5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10)
def forward(self, x): x = F.max_pool2d(F.relu(self.conv1(x)), 2) x = F.max_pool2d(F.relu(self.conv2(x)), 2) x = x.view(-1, 16*5*5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x |
第二種寫法:
import torch.nn as nn class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Sequential(nn.Conv2d(3, 6, 5, 1, 2), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.conv2 = nn.Sequential(nn.Conv2d(6, 16, 5), nn.ReLU(), nn.MaxPool2d(2, 2)) self.fc1 = nn.Sequential(nn.Linear(16*5*5, 120), nn.ReLU()) self.fc2 = nn.Sequential(nn.Linear(120, 84), nn.ReLU()) self.fc3 = nn.Linear(84, 10)
def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = view(x.size()[0], -1) x = self.fc1(x) x = self.fc2(x) x = self.fc3(x) return x |
輸出model格式:
model = LeNet()
print(model)
結果如下:
MNIST手寫數字訓練:
import torch import torchvision as tv import torchvision.transforms as transforms import torch.nn as nn import torch.optim as optim import argparse
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Sequential(nn.Conv2d(1, 6, 5, 1, 2), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.conv2 = nn.Sequential(nn.Conv2d(6, 16, 5), nn.ReLU(), nn.MaxPool2d(2, 2)) self.fc1 = nn.Sequential(nn.Linear(16*5*5, 120), nn.ReLU()) self.fc2 = nn.Sequential(nn.Linear(120, 84), nn.ReLU()) self.fc3 = nn.Linear(84, 10)
def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = view(x.size()[0], -1) x = self.fc1(x) x = self.fc2(x) x = self.fc3(x) return x
parser = argparse.ArgumentParser() parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints') parser.add_argument('--net', default='./model/net.pth', help='path to netG (to continue training)') opt = parser.parse_args() EPOCH = 8 BATCH_SIZE = 64 LR = 0.001 transform = transforms.ToTensor() trainset = tv.datasets.MNIST(root='./data/', train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True) testset = tv.datasets.MNIST(root='./data/', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False) net = LeNet().to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9) if __name__ == '__main__': for epoch in range(EPOCH): sum_loss = 0.0 for i, data in enumerate(trainloader): inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() sum_loss += loss.item() if i%100 == 99: print('[%d, %d] loss: %.03f' %(epoch+1, i+1, sum_loss/100)) sum_loss = 0.0 with torch.no_grad(): correct = 0 total = 0 for data in testloader: images, labels = data images, labels = images.to(device), labels.to(device) outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() print('第%d個epoch的識別準確率為: %d%%' %(epoch+1, (100*correct/total))) torch.save(net.state_dict(), '%s/net_%03d_%d.pth' %(opt.outf, epoch+1, (100*correct/total))) |