torch-cifar10

# 小郑之家~

### 第一步，准备数据

# 1.prepare train data and test data
# data augment
# transforms on PIL image
transform_train = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# Normalize(mean, std)  # the len of mean or std corresponding to the number of channels，
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),

])

# train data



torch.utils.data.DataLoader(Data, batch_size=, shuffle=True, num_workers=2)

torch.utils.data.Dataset

torch.utils.data.Dataset

class Data(torch.utils.data.Dataset):

def __init__(self, fileDir, transformer=None):
# 下面的就省略了，其中transformer是做数据增强的时候用的变换，这个变换可以自己实现，也可以用torch里面自带的，上面用的就是torch自带的，但是有时候可能不能够完成自己的任务 ，所以必要的时候需要自己写一个来实现具体的任务。

def __getitem__(self, index):
# 用这个的好处是可以通过index来得到对应的数据

def __len__(self):
return 有关数据的长度



### 构建网络结构

 __init__.py

 from models import *

# 2. construct model
import torch.nn as nn
import torch.nn.functional as F

class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(3,6,5)
self.conv2 = nn.Conv2d(6,16,5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84,10)

def forward(self, x):
out = F.relu(self.conv1(x))
out = F.max_pool2d(out, 2)
out = F.relu(self.conv2(out))
out = F.max_pool2d(out, 2)
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
return out



torch.nn.Module

__init__

forward

_makelayer

### 准备训练

    # 3.get the network
net = lenet.LeNet()
# choose which device to train
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = net.to(device)
if device == 'cuda':
net = torch.nn.DataParallel(net)
cudnn.benchmark=True
# 4. loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)



“net = torch.nn.DataParallel(net)”

• 如果网络的输入的数据维度或类型上变化不大，这样设置可以增加效率

• 如果网络的输入数据在每次iter的时候都变化的话，会导致cudnn每次都要去寻找一遍最优的配置，这样反而会降低运行效率。

 torch.cuda.is_available() 是判断有没有cuda的。

“net = torch.nn.DataParallel(net, device_ids=args.gpu)“

“CUDA_VISIBLE_DEVICES = ‘2,3,4,8’ “ 中的第一个。

### 开始训练和测试

def train(epoch):
print("\n Epoch: %d" % epoch)
net.train()
train_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(trainloader):
inputs, targets = inputs.to(device), targets.to(device)   # put the data onto device
outputs = net(inputs)  # get the pred output
loss = criterion(outputs, targets)
# bp
loss.backward()
optimizer.step()
train_loss += loss.item()
_, predicted = outputs.max(1)  # or torch.max(outpus.data, 1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()   # the num of predict true
print("Train's loss: %.4f, Train's acc: %.6f" %(train_loss/(batch_idx+1), 1.0*correct/total))

def test(epoch):
global best_acc
net.eval()   # just like net.train()
test_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(testloader):
inputs, targets = inputs.to(device), targets.to(device)
outputs = net(inputs)
loss = criterion(outputs, targets)
test_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
print("Test's loss: %.4f, Test's acc: %.6f" %(test_loss/(batch_idx+1), 1.0*correct/total))


“loss=criterion(outputs, targets)”

### 保存模型

 #8. decide to save model
acc = 1.0 *correct/total
if acc > best_acc:
print("Saving...")
state = {
'net':net.state_dict(),
'acc':acc,
'epoch':epoch,
}
if not os.path.isdir('checkpoint'):
os.mkdir('checkpoint')
torch.save(state, './checkpoint/cifar.t7')
best_acc = acc



### run

for epoch in range(start_epoch, start_epoch+200):
train(epoch)
test(epoch)