首页人工智能Pytorch12.LR多分类实战(MN...

12.LR多分类实战(MNIST数据集)

import torch
from torch.nn import functional as F
from torch import nn
from torch import optim
from torchvision import datasets, transforms

# 超参数设置
batch_size = 200  # 每批的样本数量
learning_rate = 0.01  # 学习率
epochs = 10  # 跑多少次样本集

# 训练集
train_loader = torch.utils.data.DataLoader(
	datasets.MNIST('../data', train=True, download=True,  # train=True则得到的是训练集
				   transform=transforms.Compose([  # 进行数据预处理
					   transforms.ToTensor(),  # 这表示转成Tensor类型的数据
					   transforms.Normalize((0.1307,), (0.3081,))  # 这里是进行数据标准化(减去均值除以方差)
				   ])),
	batch_size=batch_size, shuffle=True)  # 按batch_size分出一个batch维度在最前面,shuffle=True打乱顺序
# 测试集
test_loader = torch.utils.data.DataLoader(
	datasets.MNIST('../data', train=False, transform=transforms.Compose([
		transforms.ToTensor(),
		transforms.Normalize((0.1307,), (0.3081,))
	])),
	batch_size=batch_size, shuffle=True)

# 创建三个线性层
# 使用标准正态分布初始化
w1 = torch.randn(200, 784, requires_grad=True)  # 这里表示输入是784,输出是200,注意这个顺序
b1 = torch.randn(200, requires_grad=True)
w2 = torch.randn(200, 200, requires_grad=True)
b2 = torch.randn(200, requires_grad=True)
w3 = torch.randn(10, 200, requires_grad=True)  # 因为是10分类,所以最后的输出节点为10
b3 = torch.randn(10, requires_grad=True)

# 使用何恺明的初始化方法
# 网络中参数的初始化方法对网络表现的影响是很大的,如果此处没使用该初始化方法的话,
# 会发现数据上的Loss变化很小,在几个epoch之后仍然没有提高acc,也就是说明梯度消失或者梯度太小
torch.nn.init.kaiming_normal_(w1)
torch.nn.init.kaiming_normal_(w2)
torch.nn.init.kaiming_normal_(w3)

def forward(x):
	"""
	前向计算过程
	"""
	x = x @ w1.t() + b1
	x = F.relu(x)  # 非线性激活
	x = x @ w2.t() + b2
	x = F.relu(x)
	x = x @ w3.t() + b3
        # 最后输出时可以不使用relu()函数
	x = F.relu(x)
	return x


# 定义优化器,优化的变量为三组w,b
optimizer = optim.SGD([w1, b1, w2, b2, w3, b3], lr=learning_rate)
criteon= nn.CrossEntropyLoss()

for epoch in range(epochs):
	for batch_idx, (data, target) in enumerate(train_loader):
		data = data.view(-1, 28 * 28)

		logits = forward(data)
		loss = criteon(logits, target)

		optimizer.zero_grad()
		loss.backward()
		# print(w1.grad.norm(),w2.grad.norm())
		optimizer.step()
		if batch_idx % 100 == 0:
			print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
				epoch, batch_idx * len(data), len(train_loader.dataset),
					   100. * batch_idx / len(train_loader), loss.item()))

	# 测试
	test_loss = 0  # 测试集Loss
	correct = 0  # 正确分类的样本数
	# 对测试集中每个batch的样本,标签
	for data, target in test_loader:
		data = data.reshape(-1, 28 * 28)
		logits = forward(data)
		test_loss += criteon(logits, target).item()
		# 得到的预测值输出是一个10个分量的概率,在第2个维度上取max
		pred = logits.data.max(dim=1)[1]
		# 对应位置相等则对应位置为True,这里用sum()即记录了True的数量
		correct += pred.eq(target.data).sum()
	test_loss /= len(test_loader.dataset)
	print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
		test_loss, correct, len(test_loader.dataset),
		100. * correct / len(test_loader.dataset)))

运行结果如下:

Train Epoch: 0 [0/60000 (0%)]	Loss: 3.111916
Train Epoch: 0 [20000/60000 (33%)]	Loss: 1.142314
Train Epoch: 0 [40000/60000 (67%)]	Loss: 0.728005

Test set: Average loss: 0.0020, Accuracy: 8787/10000 (87%)

Train Epoch: 1 [0/60000 (0%)]	Loss: 0.443754
Train Epoch: 1 [20000/60000 (33%)]	Loss: 0.292206
......(略)

通过上述实例代码运行效果,可以看出网络中参数的初始化方法对网络表现的影响是很大的,有时候网络运行效果不是很好,可能是因为初始化问题。

TIPS:

  • class-style API: from torch import nn,例如nn.CrossEntropyLoss()等,需要先实例化再调用;
  • function-style API:from torch.nn import functional as F,例如F.reluF.crossentropy等。

torch.nn.CrossEntropyLoss()
这是一个,在用的时候需要先创建对象,然后把参数传给对象

# 正确示例
loss = torch.nn.CrossEntropyLoss()
loss = loss(predict, target)
# 或者
loss = torch.nn.CrossEntropyLoss()(predict, target)
# 错误示例
# 会报错"RuntimeError: Boolean value of Tensor with more than one value is ambiguous"
loss = torch.nn.CrossEntropyLoss(predict, target)
RELATED ARTICLES

欢迎留下您的宝贵建议

Please enter your comment!
Please enter your name here

- Advertisment -

Most Popular

Recent Comments