PyTorch一小時(shí)掌握之遷移學(xué)習(xí)篇
概述
遷移學(xué)習(xí) (Transfer Learning) 是把已學(xué)訓(xùn)練好的模型參數(shù)用作新訓(xùn)練模型的起始參數(shù). 遷移學(xué)習(xí)是深度學(xué)習(xí)中非常重要和常用的一個(gè)策略.

為什么使用遷移學(xué)習(xí)
更好的結(jié)果
遷移學(xué)習(xí) (Transfer Learning) 可以幫助我們得到更好的結(jié)果.
當(dāng)我們手上的數(shù)據(jù)比較少的時(shí)候, 訓(xùn)練非常容易造成過擬合的現(xiàn)象. 使用遷移學(xué)習(xí)可以幫助我們通過更少的訓(xùn)練數(shù)據(jù)達(dá)到更好的效果. 使得模型的泛化能力更強(qiáng), 訓(xùn)練過程更穩(wěn)定.

節(jié)省時(shí)間
遷移學(xué)習(xí) (Transfer Learning) 可以幫助我們節(jié)省時(shí)間.
通過遷徙學(xué)習(xí), 我們站在了巨人的肩膀上. 利用前人花大量時(shí)間訓(xùn)練好的參數(shù), 能幫助我們在模型的訓(xùn)練上節(jié)省大把的時(shí)間.

加載模型
首先我們需要加載模型, 并指定層數(shù). 常用的模型有:
- VGG
- ResNet
- SqueezeNet
- DenseNet
- Inception
- GoogLeNet
- ShuffleNet
- MobileNet
官網(wǎng) API
ResNet152
我們將使用 ResNet 152 和 CIFAR 100 來舉例.
凍層實(shí)現(xiàn)

def set_parameter_requires_grad(model, feature_extracting): """ 是否保留梯度, 實(shí)現(xiàn)凍層 :param model: 模型 :param feature_extracting: 是否凍層 :return: 無返回值 """ if feature_extracting: # 如果凍層 for param in model.parameters(): # 遍歷每個(gè)權(quán)重參數(shù) param.requires_grad = False # 保留梯度為False
模型初始化

def initialize_model(model_name, num_classes, feature_exact, use_pretrained=True): """ 初始化模型 :param model_name: 模型名字 :param num_classes: 類別數(shù) :param feature_exact: 是否凍層 :param use_pretrained: 是否下載模型 :return: 返回模型, """ model_ft = None if model_name == "resnet": """Resnet152""" # 加載模型 model_ft = models.resnet152(pretrained=use_pretrained) # 下載參數(shù) set_parameter_requires_grad(model_ft, feature_exact) # 凍層 # 修改全連接層 num_features = model_ft.fc.in_features model_ft.fc = torch.nn.Sequential( torch.nn.Linear(num_features, num_classes), torch.nn.LogSoftmax(dim=1) ) # 返回初始化好的模型 return model_ft
獲取需更新參數(shù)
def parameter_to_update(model):
"""
獲取需要更新的參數(shù)
:param model: 模型
:return: 需要更新的參數(shù)列表
"""
print("Params to learn")
param_array = model.parameters()
if feature_exact:
param_array = []
for name, param, in model.named_parameters():
if param.requires_grad == True:
param_array.append(param)
print("\t", name)
else:
for name, param, in model.named_parameters():
if param.requires_grad == True:
print("\t", name)
return param_array
訓(xùn)練模型
def train_model(model, dataloaders, citerion, optimizer, filename, num_epochs=25):
# 獲取起始時(shí)間
since = time.time()
# 初始化參數(shù)
best_acc = 0
val_acc_history = []
train_acc_history = []
train_losses = []
valid_losses = []
LRs = [optimizer.param_groups[0]["lr"]]
best_model_weights = copy.deepcopy(model.state_dict())
for epoch in range(num_epochs):
print("Epoch {}/{}".format(epoch, num_epochs - 1))
print("-" * 10)
# 訓(xùn)練和驗(yàn)證
for phase in ["train", "valid"]:
if phase == "train":
model.train() # 訓(xùn)練
else:
model.eval() # 驗(yàn)證
running_loss = 0.0
running_corrects = 0
# 遍歷數(shù)據(jù)
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# 梯度清零
optimizer.zero_grad()
# 只有訓(xùn)練的時(shí)候計(jì)算和更新梯度
with torch.set_grad_enabled(phase == "train"):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
# 計(jì)算損失
loss = criterion(outputs, labels)
# 訓(xùn)練階段更新權(quán)重
if phase == "train":loss.backward()optimizer.step()
# 計(jì)算損失
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
time_eplased = time.time() - since
print("Time elapsed {:.0f}m {:.0f}s".format(time_eplased // 60, time_eplased % 60))
print("{} Loss: {:.4f} Acc: {:.4f}".format(phase, epoch_loss, epoch_acc))
# 得到最好的模型
if phase == "valid" and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_weights = copy.deepcopy(model.state_dict())
state = {
"state_dict": model.state_dict(),
"best_acc": best_acc,
"optimizer": optimizer.state_dict(),
}
torch.save(state, filename)
if phase == "valid":
val_acc_history.append(epoch_acc)
valid_losses.append(epoch_loss)
scheduler.step(epoch_loss)
if phase == "train":
train_acc_history.append(epoch_acc)
train_losses.append(epoch_loss)
print("Optimizer learning rate: {:.7f}".format(optimizer.param_groups[0]["lr"]))
LRs.append(optimizer.param_groups[0]["lr"])
print()
time_eplased = time.time() - since
print("Training complete in {:.0f}m {:.0f}s".format(time_eplased // 60, time_eplased % 60))
print("Best val Acc: {:4f}".format(best_acc))
# 訓(xùn)練完后用最好的一次當(dāng)做模型最終的結(jié)果
model.load_state_dict(best_model_weights)
# 返回
return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs
獲取數(shù)據(jù)
def get_data():
"""獲取數(shù)據(jù)"""
# 獲取測試集
train = torchvision.datasets.CIFAR100(root="./mnt", train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(), # 轉(zhuǎn)換成張量
torchvision.transforms.Normalize((0.1307,), (0.3081,)) # 標(biāo)準(zhǔn)化
]))
train_loader = DataLoader(train, batch_size=batch_size) # 分割測試集
# 獲取測試集
test = torchvision.datasets.CIFAR100(root="./mnt", train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(), # 轉(zhuǎn)換成張量
torchvision.transforms.Normalize((0.1307,), (0.3081,)) # 標(biāo)準(zhǔn)化
]))
test_loader = DataLoader(test, batch_size=batch_size) # 分割訓(xùn)練
data_loader = {"train": train_loader, "valid": test_loader}
# 返回分割好的訓(xùn)練集和測試集
return data_loader
完整代碼

完整代碼:
import copy
import torch
from torch.utils.data import DataLoader
import time
from torchsummary import summary
import torchvision
import torchvision.models as models
def set_parameter_requires_grad(model, feature_extracting):
"""
是否保留梯度, 實(shí)現(xiàn)凍層
:param model: 模型
:param feature_extracting: 是否凍層
:return: 無返回值
"""
if feature_extracting: # 如果凍層
for param in model.parameters(): # 遍歷每個(gè)權(quán)重參數(shù)
param.requires_grad = False # 保留梯度為False
def initialize_model(model_name, num_classes, feature_exact, use_pretrained=True):
"""
初始化模型
:param model_name: 模型名字
:param num_classes: 類別數(shù)
:param feature_exact: 是否凍層
:param use_pretrained: 是否下載模型
:return: 返回模型,
"""
model_ft = None
if model_name == "resnet":
"""Resnet152"""
# 加載模型
model_ft = models.resnet152(pretrained=use_pretrained) # 下載參數(shù)
set_parameter_requires_grad(model_ft, feature_exact) # 凍層
# 修改全連接層
num_features = model_ft.fc.in_features
model_ft.fc = torch.nn.Sequential(
torch.nn.Linear(num_features, num_classes),
torch.nn.LogSoftmax(dim=1)
)
# 返回初始化好的模型
return model_ft
def parameter_to_update(model):
"""
獲取需要更新的參數(shù)
:param model: 模型
:return: 需要更新的參數(shù)列表
"""
print("Params to learn")
param_array = model.parameters()
if feature_exact:
param_array = []
for name, param, in model.named_parameters():
if param.requires_grad == True:
param_array.append(param)
print("\t", name)
else:
for name, param, in model.named_parameters():
if param.requires_grad == True:
print("\t", name)
return param_array
def train_model(model, dataloaders, citerion, optimizer, filename, num_epochs=25):
# 獲取起始時(shí)間
since = time.time()
# 初始化參數(shù)
best_acc = 0
val_acc_history = []
train_acc_history = []
train_losses = []
valid_losses = []
LRs = [optimizer.param_groups[0]["lr"]]
best_model_weights = copy.deepcopy(model.state_dict())
for epoch in range(num_epochs):
print("Epoch {}/{}".format(epoch, num_epochs - 1))
print("-" * 10)
# 訓(xùn)練和驗(yàn)證
for phase in ["train", "valid"]:
if phase == "train":
model.train() # 訓(xùn)練
else:
model.eval() # 驗(yàn)證
running_loss = 0.0
running_corrects = 0
# 遍歷數(shù)據(jù)
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# 梯度清零
optimizer.zero_grad()
# 只有訓(xùn)練的時(shí)候計(jì)算和更新梯度
with torch.set_grad_enabled(phase == "train"):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
# 計(jì)算損失
loss = criterion(outputs, labels)
# 訓(xùn)練階段更新權(quán)重
if phase == "train":loss.backward()optimizer.step()
# 計(jì)算損失
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
time_eplased = time.time() - since
print("Time elapsed {:.0f}m {:.0f}s".format(time_eplased // 60, time_eplased % 60))
print("{} Loss: {:.4f} Acc: {:.4f}".format(phase, epoch_loss, epoch_acc))
# 得到最好的模型
if phase == "valid" and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_weights = copy.deepcopy(model.state_dict())
state = {
"state_dict": model.state_dict(),
"best_acc": best_acc,
"optimizer": optimizer.state_dict(),
}
torch.save(state, filename)
if phase == "valid":
val_acc_history.append(epoch_acc)
valid_losses.append(epoch_loss)
scheduler.step(epoch_loss)
if phase == "train":
train_acc_history.append(epoch_acc)
train_losses.append(epoch_loss)
print("Optimizer learning rate: {:.7f}".format(optimizer.param_groups[0]["lr"]))
LRs.append(optimizer.param_groups[0]["lr"])
print()
time_eplased = time.time() - since
print("Training complete in {:.0f}m {:.0f}s".format(time_eplased // 60, time_eplased % 60))
print("Best val Acc: {:4f}".format(best_acc))
# 訓(xùn)練完后用最好的一次當(dāng)做模型最終的結(jié)果
model.load_state_dict(best_model_weights)
# 返回
return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs
def get_data():
"""獲取數(shù)據(jù)"""
# 獲取測試集
train = torchvision.datasets.CIFAR100(root="./mnt", train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(), # 轉(zhuǎn)換成張量
torchvision.transforms.Normalize((0.1307,), (0.3081,)) # 標(biāo)準(zhǔn)化
]))
train_loader = DataLoader(train, batch_size=batch_size) # 分割測試集
# 獲取測試集
test = torchvision.datasets.CIFAR100(root="./mnt", train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(), # 轉(zhuǎn)換成張量
torchvision.transforms.Normalize((0.1307,), (0.3081,)) # 標(biāo)準(zhǔn)化
]))
test_loader = DataLoader(test, batch_size=batch_size) # 分割訓(xùn)練
data_loader = {"train": train_loader, "valid": test_loader}
# 返回分割好的訓(xùn)練集和測試集
return data_loader
# 超參數(shù)
filename = "checkpoint.pth" # 模型保存
feature_exact = True # 凍層
num_classes = 100 # 輸出的類別數(shù)
batch_size = 1024 # 一次訓(xùn)練的樣本數(shù)目
iteration_num = 10 # 迭代次數(shù)
# 獲取模型
resnet152 = initialize_model(
model_name="resnet",
num_classes=num_classes,
feature_exact=feature_exact,
use_pretrained=True
)
# 是否使用GPU訓(xùn)練
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
if use_cuda: resnet152.cuda() # GPU 計(jì)算
print("是否使用 GPU 加速:", use_cuda)
# 輸出網(wǎng)絡(luò)結(jié)構(gòu)
print(summary(resnet152, (3, 32, 32)))
# 訓(xùn)練參數(shù)
params_to_update = parameter_to_update(resnet152)
# 優(yōu)化器
optimizer = torch.optim.Adam(params_to_update, lr=0.01)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # 學(xué)習(xí)率每10個(gè)epoch衰減到原來的1/10
criterion = torch.nn.NLLLoss()
if __name__ == "__main__":
data_loader = get_data()
resnet152, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(
model=resnet152,
dataloaders=data_loader,
citerion=criterion,
optimizer=optimizer,
num_epochs=iteration_num,
filename=filename
)
輸出結(jié)果:
是否使用 GPU 加速: True
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 16, 16] 9,408
BatchNorm2d-2 [-1, 64, 16, 16] 128
ReLU-3 [-1, 64, 16, 16] 0
MaxPool2d-4 [-1, 64, 8, 8] 0
Conv2d-5 [-1, 64, 8, 8] 4,096
BatchNorm2d-6 [-1, 64, 8, 8] 128
ReLU-7 [-1, 64, 8, 8] 0
Conv2d-8 [-1, 64, 8, 8] 36,864
BatchNorm2d-9 [-1, 64, 8, 8] 128
ReLU-10 [-1, 64, 8, 8] 0
Conv2d-11 [-1, 256, 8, 8] 16,384
BatchNorm2d-12 [-1, 256, 8, 8] 512
Conv2d-13 [-1, 256, 8, 8] 16,384
BatchNorm2d-14 [-1, 256, 8, 8] 512
ReLU-15 [-1, 256, 8, 8] 0
Bottleneck-16 [-1, 256, 8, 8] 0
Conv2d-17 [-1, 64, 8, 8] 16,384
BatchNorm2d-18 [-1, 64, 8, 8] 128
ReLU-19 [-1, 64, 8, 8] 0
Conv2d-20 [-1, 64, 8, 8] 36,864
BatchNorm2d-21 [-1, 64, 8, 8] 128
ReLU-22 [-1, 64, 8, 8] 0
Conv2d-23 [-1, 256, 8, 8] 16,384
BatchNorm2d-24 [-1, 256, 8, 8] 512
ReLU-25 [-1, 256, 8, 8] 0
Bottleneck-26 [-1, 256, 8, 8] 0
Conv2d-27 [-1, 64, 8, 8] 16,384
BatchNorm2d-28 [-1, 64, 8, 8] 128
ReLU-29 [-1, 64, 8, 8] 0
Conv2d-30 [-1, 64, 8, 8] 36,864
BatchNorm2d-31 [-1, 64, 8, 8] 128
ReLU-32 [-1, 64, 8, 8] 0
Conv2d-33 [-1, 256, 8, 8] 16,384
BatchNorm2d-34 [-1, 256, 8, 8] 512
ReLU-35 [-1, 256, 8, 8] 0
Bottleneck-36 [-1, 256, 8, 8] 0
Conv2d-37 [-1, 128, 8, 8] 32,768
BatchNorm2d-38 [-1, 128, 8, 8] 256
ReLU-39 [-1, 128, 8, 8] 0
Conv2d-40 [-1, 128, 4, 4] 147,456
BatchNorm2d-41 [-1, 128, 4, 4] 256
ReLU-42 [-1, 128, 4, 4] 0
Conv2d-43 [-1, 512, 4, 4] 65,536
BatchNorm2d-44 [-1, 512, 4, 4] 1,024
Conv2d-45 [-1, 512, 4, 4] 131,072
BatchNorm2d-46 [-1, 512, 4, 4] 1,024
ReLU-47 [-1, 512, 4, 4] 0
Bottleneck-48 [-1, 512, 4, 4] 0
Conv2d-49 [-1, 128, 4, 4] 65,536
BatchNorm2d-50 [-1, 128, 4, 4] 256
ReLU-51 [-1, 128, 4, 4] 0
Conv2d-52 [-1, 128, 4, 4] 147,456
BatchNorm2d-53 [-1, 128, 4, 4] 256
ReLU-54 [-1, 128, 4, 4] 0
Conv2d-55 [-1, 512, 4, 4] 65,536
BatchNorm2d-56 [-1, 512, 4, 4] 1,024
ReLU-57 [-1, 512, 4, 4] 0
Bottleneck-58 [-1, 512, 4, 4] 0
Conv2d-59 [-1, 128, 4, 4] 65,536
BatchNorm2d-60 [-1, 128, 4, 4] 256
ReLU-61 [-1, 128, 4, 4] 0
Conv2d-62 [-1, 128, 4, 4] 147,456
BatchNorm2d-63 [-1, 128, 4, 4] 256
ReLU-64 [-1, 128, 4, 4] 0
Conv2d-65 [-1, 512, 4, 4] 65,536
BatchNorm2d-66 [-1, 512, 4, 4] 1,024
ReLU-67 [-1, 512, 4, 4] 0
Bottleneck-68 [-1, 512, 4, 4] 0
Conv2d-69 [-1, 128, 4, 4] 65,536
BatchNorm2d-70 [-1, 128, 4, 4] 256
ReLU-71 [-1, 128, 4, 4] 0
Conv2d-72 [-1, 128, 4, 4] 147,456
BatchNorm2d-73 [-1, 128, 4, 4] 256
ReLU-74 [-1, 128, 4, 4] 0
Conv2d-75 [-1, 512, 4, 4] 65,536
BatchNorm2d-76 [-1, 512, 4, 4] 1,024
ReLU-77 [-1, 512, 4, 4] 0
Bottleneck-78 [-1, 512, 4, 4] 0
Conv2d-79 [-1, 128, 4, 4] 65,536
BatchNorm2d-80 [-1, 128, 4, 4] 256
ReLU-81 [-1, 128, 4, 4] 0
Conv2d-82 [-1, 128, 4, 4] 147,456
BatchNorm2d-83 [-1, 128, 4, 4] 256
ReLU-84 [-1, 128, 4, 4] 0
Conv2d-85 [-1, 512, 4, 4] 65,536
BatchNorm2d-86 [-1, 512, 4, 4] 1,024
ReLU-87 [-1, 512, 4, 4] 0
Bottleneck-88 [-1, 512, 4, 4] 0
Conv2d-89 [-1, 128, 4, 4] 65,536
BatchNorm2d-90 [-1, 128, 4, 4] 256
ReLU-91 [-1, 128, 4, 4] 0
Conv2d-92 [-1, 128, 4, 4] 147,456
BatchNorm2d-93 [-1, 128, 4, 4] 256
ReLU-94 [-1, 128, 4, 4] 0
Conv2d-95 [-1, 512, 4, 4] 65,536
BatchNorm2d-96 [-1, 512, 4, 4] 1,024
ReLU-97 [-1, 512, 4, 4] 0
Bottleneck-98 [-1, 512, 4, 4] 0
Conv2d-99 [-1, 128, 4, 4] 65,536
BatchNorm2d-100 [-1, 128, 4, 4] 256
ReLU-101 [-1, 128, 4, 4] 0
Conv2d-102 [-1, 128, 4, 4] 147,456
BatchNorm2d-103 [-1, 128, 4, 4] 256
ReLU-104 [-1, 128, 4, 4] 0
Conv2d-105 [-1, 512, 4, 4] 65,536
BatchNorm2d-106 [-1, 512, 4, 4] 1,024
ReLU-107 [-1, 512, 4, 4] 0
Bottleneck-108 [-1, 512, 4, 4] 0
Conv2d-109 [-1, 128, 4, 4] 65,536
BatchNorm2d-110 [-1, 128, 4, 4] 256
ReLU-111 [-1, 128, 4, 4] 0
Conv2d-112 [-1, 128, 4, 4] 147,456
BatchNorm2d-113 [-1, 128, 4, 4] 256
ReLU-114 [-1, 128, 4, 4] 0
Conv2d-115 [-1, 512, 4, 4] 65,536
BatchNorm2d-116 [-1, 512, 4, 4] 1,024
ReLU-117 [-1, 512, 4, 4] 0
Bottleneck-118 [-1, 512, 4, 4] 0
Conv2d-119 [-1, 256, 4, 4] 131,072
BatchNorm2d-120 [-1, 256, 4, 4] 512
ReLU-121 [-1, 256, 4, 4] 0
Conv2d-122 [-1, 256, 2, 2] 589,824
BatchNorm2d-123 [-1, 256, 2, 2] 512
ReLU-124 [-1, 256, 2, 2] 0
Conv2d-125 [-1, 1024, 2, 2] 262,144
BatchNorm2d-126 [-1, 1024, 2, 2] 2,048
Conv2d-127 [-1, 1024, 2, 2] 524,288
BatchNorm2d-128 [-1, 1024, 2, 2] 2,048
ReLU-129 [-1, 1024, 2, 2] 0
Bottleneck-130 [-1, 1024, 2, 2] 0
Conv2d-131 [-1, 256, 2, 2] 262,144
BatchNorm2d-132 [-1, 256, 2, 2] 512
ReLU-133 [-1, 256, 2, 2] 0
Conv2d-134 [-1, 256, 2, 2] 589,824
BatchNorm2d-135 [-1, 256, 2, 2] 512
ReLU-136 [-1, 256, 2, 2] 0
Conv2d-137 [-1, 1024, 2, 2] 262,144
BatchNorm2d-138 [-1, 1024, 2, 2] 2,048
ReLU-139 [-1, 1024, 2, 2] 0
Bottleneck-140 [-1, 1024, 2, 2] 0
Conv2d-141 [-1, 256, 2, 2] 262,144
BatchNorm2d-142 [-1, 256, 2, 2] 512
ReLU-143 [-1, 256, 2, 2] 0
Conv2d-144 [-1, 256, 2, 2] 589,824
BatchNorm2d-145 [-1, 256, 2, 2] 512
ReLU-146 [-1, 256, 2, 2] 0
Conv2d-147 [-1, 1024, 2, 2] 262,144
BatchNorm2d-148 [-1, 1024, 2, 2] 2,048
ReLU-149 [-1, 1024, 2, 2] 0
Bottleneck-150 [-1, 1024, 2, 2] 0
Conv2d-151 [-1, 256, 2, 2] 262,144
BatchNorm2d-152 [-1, 256, 2, 2] 512
ReLU-153 [-1, 256, 2, 2] 0
Conv2d-154 [-1, 256, 2, 2] 589,824
BatchNorm2d-155 [-1, 256, 2, 2] 512
ReLU-156 [-1, 256, 2, 2] 0
Conv2d-157 [-1, 1024, 2, 2] 262,144
BatchNorm2d-158 [-1, 1024, 2, 2] 2,048
ReLU-159 [-1, 1024, 2, 2] 0
Bottleneck-160 [-1, 1024, 2, 2] 0
Conv2d-161 [-1, 256, 2, 2] 262,144
BatchNorm2d-162 [-1, 256, 2, 2] 512
ReLU-163 [-1, 256, 2, 2] 0
Conv2d-164 [-1, 256, 2, 2] 589,824
BatchNorm2d-165 [-1, 256, 2, 2] 512
ReLU-166 [-1, 256, 2, 2] 0
Conv2d-167 [-1, 1024, 2, 2] 262,144
BatchNorm2d-168 [-1, 1024, 2, 2] 2,048
ReLU-169 [-1, 1024, 2, 2] 0
Bottleneck-170 [-1, 1024, 2, 2] 0
Conv2d-171 [-1, 256, 2, 2] 262,144
BatchNorm2d-172 [-1, 256, 2, 2] 512
ReLU-173 [-1, 256, 2, 2] 0
Conv2d-174 [-1, 256, 2, 2] 589,824
BatchNorm2d-175 [-1, 256, 2, 2] 512
ReLU-176 [-1, 256, 2, 2] 0
Conv2d-177 [-1, 1024, 2, 2] 262,144
BatchNorm2d-178 [-1, 1024, 2, 2] 2,048
ReLU-179 [-1, 1024, 2, 2] 0
Bottleneck-180 [-1, 1024, 2, 2] 0
Conv2d-181 [-1, 256, 2, 2] 262,144
BatchNorm2d-182 [-1, 256, 2, 2] 512
ReLU-183 [-1, 256, 2, 2] 0
Conv2d-184 [-1, 256, 2, 2] 589,824
BatchNorm2d-185 [-1, 256, 2, 2] 512
ReLU-186 [-1, 256, 2, 2] 0
Conv2d-187 [-1, 1024, 2, 2] 262,144
BatchNorm2d-188 [-1, 1024, 2, 2] 2,048
ReLU-189 [-1, 1024, 2, 2] 0
Bottleneck-190 [-1, 1024, 2, 2] 0
Conv2d-191 [-1, 256, 2, 2] 262,144
BatchNorm2d-192 [-1, 256, 2, 2] 512
ReLU-193 [-1, 256, 2, 2] 0
Conv2d-194 [-1, 256, 2, 2] 589,824
BatchNorm2d-195 [-1, 256, 2, 2] 512
ReLU-196 [-1, 256, 2, 2] 0
Conv2d-197 [-1, 1024, 2, 2] 262,144
BatchNorm2d-198 [-1, 1024, 2, 2] 2,048
ReLU-199 [-1, 1024, 2, 2] 0
Bottleneck-200 [-1, 1024, 2, 2] 0
Conv2d-201 [-1, 256, 2, 2] 262,144
BatchNorm2d-202 [-1, 256, 2, 2] 512
ReLU-203 [-1, 256, 2, 2] 0
Conv2d-204 [-1, 256, 2, 2] 589,824
BatchNorm2d-205 [-1, 256, 2, 2] 512
ReLU-206 [-1, 256, 2, 2] 0
Conv2d-207 [-1, 1024, 2, 2] 262,144
BatchNorm2d-208 [-1, 1024, 2, 2] 2,048
ReLU-209 [-1, 1024, 2, 2] 0
Bottleneck-210 [-1, 1024, 2, 2] 0
Conv2d-211 [-1, 256, 2, 2] 262,144
BatchNorm2d-212 [-1, 256, 2, 2] 512
ReLU-213 [-1, 256, 2, 2] 0
Conv2d-214 [-1, 256, 2, 2] 589,824
BatchNorm2d-215 [-1, 256, 2, 2] 512
ReLU-216 [-1, 256, 2, 2] 0
Conv2d-217 [-1, 1024, 2, 2] 262,144
BatchNorm2d-218 [-1, 1024, 2, 2] 2,048
ReLU-219 [-1, 1024, 2, 2] 0
Bottleneck-220 [-1, 1024, 2, 2] 0
Conv2d-221 [-1, 256, 2, 2] 262,144
BatchNorm2d-222 [-1, 256, 2, 2] 512
ReLU-223 [-1, 256, 2, 2] 0
Conv2d-224 [-1, 256, 2, 2] 589,824
BatchNorm2d-225 [-1, 256, 2, 2] 512
ReLU-226 [-1, 256, 2, 2] 0
Conv2d-227 [-1, 1024, 2, 2] 262,144
BatchNorm2d-228 [-1, 1024, 2, 2] 2,048
ReLU-229 [-1, 1024, 2, 2] 0
Bottleneck-230 [-1, 1024, 2, 2] 0
Conv2d-231 [-1, 256, 2, 2] 262,144
BatchNorm2d-232 [-1, 256, 2, 2] 512
ReLU-233 [-1, 256, 2, 2] 0
Conv2d-234 [-1, 256, 2, 2] 589,824
BatchNorm2d-235 [-1, 256, 2, 2] 512
ReLU-236 [-1, 256, 2, 2] 0
Conv2d-237 [-1, 1024, 2, 2] 262,144
BatchNorm2d-238 [-1, 1024, 2, 2] 2,048
ReLU-239 [-1, 1024, 2, 2] 0
Bottleneck-240 [-1, 1024, 2, 2] 0
Conv2d-241 [-1, 256, 2, 2] 262,144
BatchNorm2d-242 [-1, 256, 2, 2] 512
ReLU-243 [-1, 256, 2, 2] 0
Conv2d-244 [-1, 256, 2, 2] 589,824
BatchNorm2d-245 [-1, 256, 2, 2] 512
ReLU-246 [-1, 256, 2, 2] 0
Conv2d-247 [-1, 1024, 2, 2] 262,144
BatchNorm2d-248 [-1, 1024, 2, 2] 2,048
ReLU-249 [-1, 1024, 2, 2] 0
Bottleneck-250 [-1, 1024, 2, 2] 0
Conv2d-251 [-1, 256, 2, 2] 262,144
BatchNorm2d-252 [-1, 256, 2, 2] 512
ReLU-253 [-1, 256, 2, 2] 0
Conv2d-254 [-1, 256, 2, 2] 589,824
BatchNorm2d-255 [-1, 256, 2, 2] 512
ReLU-256 [-1, 256, 2, 2] 0
Conv2d-257 [-1, 1024, 2, 2] 262,144
BatchNorm2d-258 [-1, 1024, 2, 2] 2,048
ReLU-259 [-1, 1024, 2, 2] 0
Bottleneck-260 [-1, 1024, 2, 2] 0
Conv2d-261 [-1, 256, 2, 2] 262,144
BatchNorm2d-262 [-1, 256, 2, 2] 512
ReLU-263 [-1, 256, 2, 2] 0
Conv2d-264 [-1, 256, 2, 2] 589,824
BatchNorm2d-265 [-1, 256, 2, 2] 512
ReLU-266 [-1, 256, 2, 2] 0
Conv2d-267 [-1, 1024, 2, 2] 262,144
BatchNorm2d-268 [-1, 1024, 2, 2] 2,048
ReLU-269 [-1, 1024, 2, 2] 0
Bottleneck-270 [-1, 1024, 2, 2] 0
Conv2d-271 [-1, 256, 2, 2] 262,144
BatchNorm2d-272 [-1, 256, 2, 2] 512
ReLU-273 [-1, 256, 2, 2] 0
Conv2d-274 [-1, 256, 2, 2] 589,824
BatchNorm2d-275 [-1, 256, 2, 2] 512
ReLU-276 [-1, 256, 2, 2] 0
Conv2d-277 [-1, 1024, 2, 2] 262,144
BatchNorm2d-278 [-1, 1024, 2, 2] 2,048
ReLU-279 [-1, 1024, 2, 2] 0
Bottleneck-280 [-1, 1024, 2, 2] 0
Conv2d-281 [-1, 256, 2, 2] 262,144
BatchNorm2d-282 [-1, 256, 2, 2] 512
ReLU-283 [-1, 256, 2, 2] 0
Conv2d-284 [-1, 256, 2, 2] 589,824
BatchNorm2d-285 [-1, 256, 2, 2] 512
ReLU-286 [-1, 256, 2, 2] 0
Conv2d-287 [-1, 1024, 2, 2] 262,144
BatchNorm2d-288 [-1, 1024, 2, 2] 2,048
ReLU-289 [-1, 1024, 2, 2] 0
Bottleneck-290 [-1, 1024, 2, 2] 0
Conv2d-291 [-1, 256, 2, 2] 262,144
BatchNorm2d-292 [-1, 256, 2, 2] 512
ReLU-293 [-1, 256, 2, 2] 0
Conv2d-294 [-1, 256, 2, 2] 589,824
BatchNorm2d-295 [-1, 256, 2, 2] 512
ReLU-296 [-1, 256, 2, 2] 0
Conv2d-297 [-1, 1024, 2, 2] 262,144
BatchNorm2d-298 [-1, 1024, 2, 2] 2,048
ReLU-299 [-1, 1024, 2, 2] 0
Bottleneck-300 [-1, 1024, 2, 2] 0
Conv2d-301 [-1, 256, 2, 2] 262,144
BatchNorm2d-302 [-1, 256, 2, 2] 512
ReLU-303 [-1, 256, 2, 2] 0
Conv2d-304 [-1, 256, 2, 2] 589,824
BatchNorm2d-305 [-1, 256, 2, 2] 512
ReLU-306 [-1, 256, 2, 2] 0
Conv2d-307 [-1, 1024, 2, 2] 262,144
BatchNorm2d-308 [-1, 1024, 2, 2] 2,048
ReLU-309 [-1, 1024, 2, 2] 0
Bottleneck-310 [-1, 1024, 2, 2] 0
Conv2d-311 [-1, 256, 2, 2] 262,144
BatchNorm2d-312 [-1, 256, 2, 2] 512
ReLU-313 [-1, 256, 2, 2] 0
Conv2d-314 [-1, 256, 2, 2] 589,824
BatchNorm2d-315 [-1, 256, 2, 2] 512
ReLU-316 [-1, 256, 2, 2] 0
Conv2d-317 [-1, 1024, 2, 2] 262,144
BatchNorm2d-318 [-1, 1024, 2, 2] 2,048
ReLU-319 [-1, 1024, 2, 2] 0
Bottleneck-320 [-1, 1024, 2, 2] 0
Conv2d-321 [-1, 256, 2, 2] 262,144
BatchNorm2d-322 [-1, 256, 2, 2] 512
ReLU-323 [-1, 256, 2, 2] 0
Conv2d-324 [-1, 256, 2, 2] 589,824
BatchNorm2d-325 [-1, 256, 2, 2] 512
ReLU-326 [-1, 256, 2, 2] 0
Conv2d-327 [-1, 1024, 2, 2] 262,144
BatchNorm2d-328 [-1, 1024, 2, 2] 2,048
ReLU-329 [-1, 1024, 2, 2] 0
Bottleneck-330 [-1, 1024, 2, 2] 0
Conv2d-331 [-1, 256, 2, 2] 262,144
BatchNorm2d-332 [-1, 256, 2, 2] 512
ReLU-333 [-1, 256, 2, 2] 0
Conv2d-334 [-1, 256, 2, 2] 589,824
BatchNorm2d-335 [-1, 256, 2, 2] 512
ReLU-336 [-1, 256, 2, 2] 0
Conv2d-337 [-1, 1024, 2, 2] 262,144
BatchNorm2d-338 [-1, 1024, 2, 2] 2,048
ReLU-339 [-1, 1024, 2, 2] 0
Bottleneck-340 [-1, 1024, 2, 2] 0
Conv2d-341 [-1, 256, 2, 2] 262,144
BatchNorm2d-342 [-1, 256, 2, 2] 512
ReLU-343 [-1, 256, 2, 2] 0
Conv2d-344 [-1, 256, 2, 2] 589,824
BatchNorm2d-345 [-1, 256, 2, 2] 512
ReLU-346 [-1, 256, 2, 2] 0
Conv2d-347 [-1, 1024, 2, 2] 262,144
BatchNorm2d-348 [-1, 1024, 2, 2] 2,048
ReLU-349 [-1, 1024, 2, 2] 0
Bottleneck-350 [-1, 1024, 2, 2] 0
Conv2d-351 [-1, 256, 2, 2] 262,144
BatchNorm2d-352 [-1, 256, 2, 2] 512
ReLU-353 [-1, 256, 2, 2] 0
Conv2d-354 [-1, 256, 2, 2] 589,824
BatchNorm2d-355 [-1, 256, 2, 2] 512
ReLU-356 [-1, 256, 2, 2] 0
Conv2d-357 [-1, 1024, 2, 2] 262,144
BatchNorm2d-358 [-1, 1024, 2, 2] 2,048
ReLU-359 [-1, 1024, 2, 2] 0
Bottleneck-360 [-1, 1024, 2, 2] 0
Conv2d-361 [-1, 256, 2, 2] 262,144
BatchNorm2d-362 [-1, 256, 2, 2] 512
ReLU-363 [-1, 256, 2, 2] 0
Conv2d-364 [-1, 256, 2, 2] 589,824
BatchNorm2d-365 [-1, 256, 2, 2] 512
ReLU-366 [-1, 256, 2, 2] 0
Conv2d-367 [-1, 1024, 2, 2] 262,144
BatchNorm2d-368 [-1, 1024, 2, 2] 2,048
ReLU-369 [-1, 1024, 2, 2] 0
Bottleneck-370 [-1, 1024, 2, 2] 0
Conv2d-371 [-1, 256, 2, 2] 262,144
BatchNorm2d-372 [-1, 256, 2, 2] 512
ReLU-373 [-1, 256, 2, 2] 0
Conv2d-374 [-1, 256, 2, 2] 589,824
BatchNorm2d-375 [-1, 256, 2, 2] 512
ReLU-376 [-1, 256, 2, 2] 0
Conv2d-377 [-1, 1024, 2, 2] 262,144
BatchNorm2d-378 [-1, 1024, 2, 2] 2,048
ReLU-379 [-1, 1024, 2, 2] 0
Bottleneck-380 [-1, 1024, 2, 2] 0
Conv2d-381 [-1, 256, 2, 2] 262,144
BatchNorm2d-382 [-1, 256, 2, 2] 512
ReLU-383 [-1, 256, 2, 2] 0
Conv2d-384 [-1, 256, 2, 2] 589,824
BatchNorm2d-385 [-1, 256, 2, 2] 512
ReLU-386 [-1, 256, 2, 2] 0
Conv2d-387 [-1, 1024, 2, 2] 262,144
BatchNorm2d-388 [-1, 1024, 2, 2] 2,048
ReLU-389 [-1, 1024, 2, 2] 0
Bottleneck-390 [-1, 1024, 2, 2] 0
Conv2d-391 [-1, 256, 2, 2] 262,144
BatchNorm2d-392 [-1, 256, 2, 2] 512
ReLU-393 [-1, 256, 2, 2] 0
Conv2d-394 [-1, 256, 2, 2] 589,824
BatchNorm2d-395 [-1, 256, 2, 2] 512
ReLU-396 [-1, 256, 2, 2] 0
Conv2d-397 [-1, 1024, 2, 2] 262,144
BatchNorm2d-398 [-1, 1024, 2, 2] 2,048
ReLU-399 [-1, 1024, 2, 2] 0
Bottleneck-400 [-1, 1024, 2, 2] 0
Conv2d-401 [-1, 256, 2, 2] 262,144
BatchNorm2d-402 [-1, 256, 2, 2] 512
ReLU-403 [-1, 256, 2, 2] 0
Conv2d-404 [-1, 256, 2, 2] 589,824
BatchNorm2d-405 [-1, 256, 2, 2] 512
ReLU-406 [-1, 256, 2, 2] 0
Conv2d-407 [-1, 1024, 2, 2] 262,144
BatchNorm2d-408 [-1, 1024, 2, 2] 2,048
ReLU-409 [-1, 1024, 2, 2] 0
Bottleneck-410 [-1, 1024, 2, 2] 0
Conv2d-411 [-1, 256, 2, 2] 262,144
BatchNorm2d-412 [-1, 256, 2, 2] 512
ReLU-413 [-1, 256, 2, 2] 0
Conv2d-414 [-1, 256, 2, 2] 589,824
BatchNorm2d-415 [-1, 256, 2, 2] 512
ReLU-416 [-1, 256, 2, 2] 0
Conv2d-417 [-1, 1024, 2, 2] 262,144
BatchNorm2d-418 [-1, 1024, 2, 2] 2,048
ReLU-419 [-1, 1024, 2, 2] 0
Bottleneck-420 [-1, 1024, 2, 2] 0
Conv2d-421 [-1, 256, 2, 2] 262,144
BatchNorm2d-422 [-1, 256, 2, 2] 512
ReLU-423 [-1, 256, 2, 2] 0
Conv2d-424 [-1, 256, 2, 2] 589,824
BatchNorm2d-425 [-1, 256, 2, 2] 512
ReLU-426 [-1, 256, 2, 2] 0
Conv2d-427 [-1, 1024, 2, 2] 262,144
BatchNorm2d-428 [-1, 1024, 2, 2] 2,048
ReLU-429 [-1, 1024, 2, 2] 0
Bottleneck-430 [-1, 1024, 2, 2] 0
Conv2d-431 [-1, 256, 2, 2] 262,144
BatchNorm2d-432 [-1, 256, 2, 2] 512
ReLU-433 [-1, 256, 2, 2] 0
Conv2d-434 [-1, 256, 2, 2] 589,824
BatchNorm2d-435 [-1, 256, 2, 2] 512
ReLU-436 [-1, 256, 2, 2] 0
Conv2d-437 [-1, 1024, 2, 2] 262,144
BatchNorm2d-438 [-1, 1024, 2, 2] 2,048
ReLU-439 [-1, 1024, 2, 2] 0
Bottleneck-440 [-1, 1024, 2, 2] 0
Conv2d-441 [-1, 256, 2, 2] 262,144
BatchNorm2d-442 [-1, 256, 2, 2] 512
ReLU-443 [-1, 256, 2, 2] 0
Conv2d-444 [-1, 256, 2, 2] 589,824
BatchNorm2d-445 [-1, 256, 2, 2] 512
ReLU-446 [-1, 256, 2, 2] 0
Conv2d-447 [-1, 1024, 2, 2] 262,144
BatchNorm2d-448 [-1, 1024, 2, 2] 2,048
ReLU-449 [-1, 1024, 2, 2] 0
Bottleneck-450 [-1, 1024, 2, 2] 0
Conv2d-451 [-1, 256, 2, 2] 262,144
BatchNorm2d-452 [-1, 256, 2, 2] 512
ReLU-453 [-1, 256, 2, 2] 0
Conv2d-454 [-1, 256, 2, 2] 589,824
BatchNorm2d-455 [-1, 256, 2, 2] 512
ReLU-456 [-1, 256, 2, 2] 0
Conv2d-457 [-1, 1024, 2, 2] 262,144
BatchNorm2d-458 [-1, 1024, 2, 2] 2,048
ReLU-459 [-1, 1024, 2, 2] 0
Bottleneck-460 [-1, 1024, 2, 2] 0
Conv2d-461 [-1, 256, 2, 2] 262,144
BatchNorm2d-462 [-1, 256, 2, 2] 512
ReLU-463 [-1, 256, 2, 2] 0
Conv2d-464 [-1, 256, 2, 2] 589,824
BatchNorm2d-465 [-1, 256, 2, 2] 512
ReLU-466 [-1, 256, 2, 2] 0
Conv2d-467 [-1, 1024, 2, 2] 262,144
BatchNorm2d-468 [-1, 1024, 2, 2] 2,048
ReLU-469 [-1, 1024, 2, 2] 0
Bottleneck-470 [-1, 1024, 2, 2] 0
Conv2d-471 [-1, 256, 2, 2] 262,144
BatchNorm2d-472 [-1, 256, 2, 2] 512
ReLU-473 [-1, 256, 2, 2] 0
Conv2d-474 [-1, 256, 2, 2] 589,824
BatchNorm2d-475 [-1, 256, 2, 2] 512
ReLU-476 [-1, 256, 2, 2] 0
Conv2d-477 [-1, 1024, 2, 2] 262,144
BatchNorm2d-478 [-1, 1024, 2, 2] 2,048
ReLU-479 [-1, 1024, 2, 2] 0
Bottleneck-480 [-1, 1024, 2, 2] 0
Conv2d-481 [-1, 512, 2, 2] 524,288
BatchNorm2d-482 [-1, 512, 2, 2] 1,024
ReLU-483 [-1, 512, 2, 2] 0
Conv2d-484 [-1, 512, 1, 1] 2,359,296
BatchNorm2d-485 [-1, 512, 1, 1] 1,024
ReLU-486 [-1, 512, 1, 1] 0
Conv2d-487 [-1, 2048, 1, 1] 1,048,576
BatchNorm2d-488 [-1, 2048, 1, 1] 4,096
Conv2d-489 [-1, 2048, 1, 1] 2,097,152
BatchNorm2d-490 [-1, 2048, 1, 1] 4,096
ReLU-491 [-1, 2048, 1, 1] 0
Bottleneck-492 [-1, 2048, 1, 1] 0
Conv2d-493 [-1, 512, 1, 1] 1,048,576
BatchNorm2d-494 [-1, 512, 1, 1] 1,024
ReLU-495 [-1, 512, 1, 1] 0
Conv2d-496 [-1, 512, 1, 1] 2,359,296
BatchNorm2d-497 [-1, 512, 1, 1] 1,024
ReLU-498 [-1, 512, 1, 1] 0
Conv2d-499 [-1, 2048, 1, 1] 1,048,576
BatchNorm2d-500 [-1, 2048, 1, 1] 4,096
ReLU-501 [-1, 2048, 1, 1] 0
Bottleneck-502 [-1, 2048, 1, 1] 0
Conv2d-503 [-1, 512, 1, 1] 1,048,576
BatchNorm2d-504 [-1, 512, 1, 1] 1,024
ReLU-505 [-1, 512, 1, 1] 0
Conv2d-506 [-1, 512, 1, 1] 2,359,296
BatchNorm2d-507 [-1, 512, 1, 1] 1,024
ReLU-508 [-1, 512, 1, 1] 0
Conv2d-509 [-1, 2048, 1, 1] 1,048,576
BatchNorm2d-510 [-1, 2048, 1, 1] 4,096
ReLU-511 [-1, 2048, 1, 1] 0
Bottleneck-512 [-1, 2048, 1, 1] 0
AdaptiveAvgPool2d-513 [-1, 2048, 1, 1] 0
Linear-514 [-1, 100] 204,900
LogSoftmax-515 [-1, 100] 0
================================================================
Total params: 58,348,708
Trainable params: 204,900
Non-trainable params: 58,143,808
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 12.40
Params size (MB): 222.58
Estimated Total Size (MB): 234.99
----------------------------------------------------------------
None
Params to learn
fc.0.weight
fc.0.bias
Files already downloaded and verified
Files already downloaded and verified
Epoch 0/9
----------
Time elapsed 0m 21s
train Loss: 7.5111 Acc: 0.1484
Time elapsed 0m 26s
valid Loss: 3.7821 Acc: 0.2493
/usr/local/lib/python3.7/dist-packages/torch/optim/lr_scheduler.py:154: UserWarning: The epoch parameter in `scheduler.step()` was not necessary and is being deprecated where possible. Please use `scheduler.step()` to step the scheduler. During the deprecation, if epoch is different from None, the closed form is used instead of the new chainable form, where available. Please open an issue if you are unable to replicate your use case: https://github.com/pytorch/pytorch/issues/new/choose.
warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)
Optimizer learning rate: 0.0100000Epoch 1/9
----------
Time elapsed 0m 47s
train Loss: 2.9405 Acc: 0.3109
Time elapsed 0m 52s
valid Loss: 3.2014 Acc: 0.2739
Optimizer learning rate: 0.0100000Epoch 2/9
----------
Time elapsed 1m 12s
train Loss: 2.5866 Acc: 0.3622
Time elapsed 1m 17s
valid Loss: 3.2239 Acc: 0.2787
Optimizer learning rate: 0.0100000Epoch 3/9
----------
Time elapsed 1m 38s
train Loss: 2.4077 Acc: 0.3969
Time elapsed 1m 43s
valid Loss: 3.2608 Acc: 0.2811
Optimizer learning rate: 0.0100000Epoch 4/9
----------
Time elapsed 2m 4s
train Loss: 2.2742 Acc: 0.4263
Time elapsed 2m 9s
valid Loss: 3.4260 Acc: 0.2689
Optimizer learning rate: 0.0100000Epoch 5/9
----------
Time elapsed 2m 29s
train Loss: 2.1942 Acc: 0.4434
Time elapsed 2m 34s
valid Loss: 3.4697 Acc: 0.2760
Optimizer learning rate: 0.0100000Epoch 6/9
----------
Time elapsed 2m 54s
train Loss: 2.1369 Acc: 0.4583
Time elapsed 2m 59s
valid Loss: 3.5391 Acc: 0.2744
Optimizer learning rate: 0.0100000Epoch 7/9
----------
Time elapsed 3m 20s
train Loss: 2.0382 Acc: 0.4771
Time elapsed 3m 24s
valid Loss: 3.5992 Acc: 0.2721
Optimizer learning rate: 0.0100000Epoch 8/9
----------
Time elapsed 3m 45s
train Loss: 1.9776 Acc: 0.4939
Time elapsed 3m 50s
valid Loss: 3.7533 Acc: 0.2685
Optimizer learning rate: 0.0100000Epoch 9/9
----------
Time elapsed 4m 11s
train Loss: 1.9309 Acc: 0.5035
Time elapsed 4m 16s
valid Loss: 3.9663 Acc: 0.2558
Optimizer learning rate: 0.0100000Training complete in 4m 16s
Best val Acc: 0.281100
到此這篇關(guān)于PyTorch一小時(shí)掌握之遷移學(xué)習(xí)篇的文章就介紹到這了,更多相關(guān)PyTorch遷移學(xué)習(xí)內(nèi)容請搜索本站以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持本站!
版權(quán)聲明:本站文章來源標(biāo)注為YINGSOO的內(nèi)容版權(quán)均為本站所有,歡迎引用、轉(zhuǎn)載,請保持原文完整并注明來源及原文鏈接。禁止復(fù)制或仿造本網(wǎng)站,禁止在非maisonbaluchon.cn所屬的服務(wù)器上建立鏡像,否則將依法追究法律責(zé)任。本站部分內(nèi)容來源于網(wǎng)友推薦、互聯(lián)網(wǎng)收集整理而來,僅供學(xué)習(xí)參考,不代表本站立場,如有內(nèi)容涉嫌侵權(quán),請聯(lián)系alex-e#qq.com處理。
關(guān)注官方微信