1.对应误差反向传播法的神经网络的实现:
import sys, os
sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
# 初始化权重
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
# 生成层
self.layers = OrderedDict()
self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
self.layers['Relu1'] = Relu()
self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
self.lastLayer = SoftmaxWithLoss()
def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)
return x
上述代码中将神经网络的层保存为OrderedDict
这样的有序字典,因此,神经网络的正向传播只需按照添加元素的顺序调用各层的forward()
方法及可以完成处理,而反向传播只需要按照相反的顺序调用各层即可。
所以,只需要以正确的顺序连接各层,再按顺序调用各层即可。
也可以像搭建积木似的,很方便的构建更大的神经网络。
2.以MNIST数据集为例实现误差反向传播法的学习:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet
# 读入数据
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size / batch_size, 1)
for i in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# 梯度
#grad = network.numerical_gradient(x_batch, t_batch)
grad = network.gradient(x_batch, t_batch)
# 更新
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x_batch, t_batch)
train_loss_list.append(loss)
if i % iter_per_epoch == 0:
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print(train_acc, test_acc)
运行得到结果
0.1067 0.1099
0.9022666666666667 0.9064
0.9218666666666666 0.9214
0.9337 0.9317
0.94575 0.945
0.9511166666666667 0.9484
0.957 0.9538
0.9607333333333333 0.9572
0.9644 0.9607
0.96745 0.9625
0.97035 0.9647
0.9719166666666667 0.9659
0.97275 0.9657
0.9740166666666666 0.9658
0.97725 0.9689
0.9789666666666667 0.9705
0.9800666666666666 0.9704
Reference:
《Deep Learning from Scratch》