1.从零实现丢弃法:
import d2lzh as d2l
from mxnet import autograd,gluon,init,nd
from mxnet.gluon import loss as gloss ,nn
# 定义dropout函数
def dropout(X,drop_prob):
assert 0 <= drop_prob <= 1
keep_prob = 1 - drop_prob
# 这种情况下把全部元素都丢弃
if keep_prob == 0:
return X.zeros_like()
mask = nd.random.uniform(0,1,X.shape) < keep_prob
return mask * X / keep_prob
# 定义模型参数
num_inputs , num_outputs , num_hiddens1 , num_hiddens2 = 784,10,256,256
w1 = nd.random.normal(scale=0.01,shape=(num_inputs,num_hiddens1))
b1 = nd.zeros(num_hiddens1)
w2 = nd.random.normal(scale=0.01,shape=(num_hiddens1,num_hiddens2))
b2 = nd.zeros(num_hiddens2)
w3 = nd.random.normal(scale=0.01,shape=(num_hiddens2,num_outputs))
b3 = nd.zeros(num_outputs)
params = [w1,b1,w2,b2,w3,b3]
for param in params:
param.attach_grad()
# 定义模型
# 此处定义的模型,将全连接层和激活函数RELU串联起来,并对每个激活函数的输出使用丢弃法
# 可以分别设置各个层的丢弃概率,但是通常的建议是把靠近输入层的丢弃概率设得小一点
drop_prob1 , drop_prob2 = 0.2,0.5
def net(X):
X = X.reshape((-1,num_inputs))
H1 = (nd.dot(X,w1)+b1).relu()
if autograd.is_training():
H1 = dropout(H1,drop_prob1)
H2 = (nd.dot(H1,w2)+b2)
if autograd.is_training():
H2 = dropout(H2,drop_prob2)
return nd.dot(H2,w3)+b3
# 训练和测试模型
num_epochs , lr , batch_size = 10,0.5,256
loss = gloss.SoftmaxCrossEntropyLoss()
train_iter , test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)
迭代十次得到如下结果:
epoch 1, loss 1.2998, train acc 0.509, test acc 0.712
epoch 2, loss 0.6325, train acc 0.763, test acc 0.815
epoch 3, loss 0.5519, train acc 0.797, test acc 0.844
epoch 4, loss 0.5066, train acc 0.818, test acc 0.848
epoch 5, loss 0.4836, train acc 0.826, test acc 0.859
epoch 6, loss 0.4608, train acc 0.832, test acc 0.862
epoch 7, loss 0.4467, train acc 0.838, test acc 0.866
epoch 8, loss 0.4323, train acc 0.841, test acc 0.857
epoch 9, loss 0.4235, train acc 0.844, test acc 0.864
epoch 10, loss 0.4143, train acc 0.850, test acc 0.869
2.丢弃法的简洁实现:
在Gluon中只需要在全连接层后添加Dropout层并指定丢弃概率即可。在训练模型时,Dropout层将以指定概率随机丢弃上一层的输出元素,在测试模型时,Dropout层并不发挥作用。
import d2lzh as d2l
from mxnet import autograd,gluon,init,nd
from mxnet.gluon import loss as gloss ,nn
# 定义dropout函数
def dropout(X,drop_prob):
assert 0 <= drop_prob <= 1
keep_prob = 1 - drop_prob
# 这种情况下把全部元素都丢弃
if keep_prob == 0:
return X.zeros_like()
mask = nd.random.uniform(0,1,X.shape) < keep_prob
return mask * X / keep_prob
# 定义模型参数
num_inputs , num_outputs , num_hiddens1 , num_hiddens2 = 784,10,256,256
w1 = nd.random.normal(scale=0.01,shape=(num_inputs,num_hiddens1))
b1 = nd.zeros(num_hiddens1)
w2 = nd.random.normal(scale=0.01,shape=(num_hiddens1,num_hiddens2))
b2 = nd.zeros(num_hiddens2)
w3 = nd.random.normal(scale=0.01,shape=(num_hiddens2,num_outputs))
b3 = nd.zeros(num_outputs)
params = [w1,b1,w2,b2,w3,b3]
for param in params:
param.attach_grad()
# 定义模型
drop_prob1 , drop_prob2 = 0.2,0.5
net = nn.Sequential()
net.add(nn.Dense(256,activation='relu'),
nn.Dropout(drop_prob1),
nn.Dense(256,activation='relu'),
nn.Dropout(drop_prob2),
nn.Dense(10))
net.initialize(init.Normal(sigma=0.01)) # 随机初始化模型参数
# 训练和测试模型
num_epochs , lr , batch_size = 10,0.5,256
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,None,None,trainer)
迭代十次得到如下结果:
epoch 1, loss 1.2722, train acc 0.503, test acc 0.772
epoch 2, loss 0.6177, train acc 0.771, test acc 0.829
epoch 3, loss 0.5139, train acc 0.813, test acc 0.850
epoch 4, loss 0.4628, train acc 0.831, test acc 0.858
epoch 5, loss 0.4400, train acc 0.841, test acc 0.860
epoch 6, loss 0.4172, train acc 0.848, test acc 0.868
epoch 7, loss 0.4006, train acc 0.855, test acc 0.872
epoch 8, loss 0.3847, train acc 0.859, test acc 0.866
epoch 9, loss 0.3723, train acc 0.865, test acc 0.873
epoch 10, loss 0.3614, train acc 0.868, test acc 0.877
Reference:
《动手学深度学习》