1.仅使用NDArray和autograd来实现一个线性回归:
from matplotlib import pyplot as plt
from mxnet import autograd, nd
import random
# 生成数据集
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=1, shape=labels.shape)
# print(features[0],labels[0])
# plt.scatter(features[:,1].asnumpy(),labels.asnumpy()) # 查看第二个特征与标签数据的散点图
# plt.show()
# 读取数据,每次返回batch_size个随机样本的特征和标签
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices)
for i in range(0, num_examples, batch_size):
j = nd.array(indices[i:min(i + batch_size, num_examples)])
yield features.take(j), labels.take(j)
# 初始化模型参数,将权重初始化成均值为0,、标准差为0.01的正态分布随机数,偏差初始化为0
w = nd.random.normal(scale=0.01,shape=(num_inputs,1))
b = nd.zeros(shape=(1,))
w.attach_grad() # 模型训练中需要对这些参数求梯度来迭代参数的值,因此需要创建梯度
b.attach_grad()
# 定义模型
def linereg(X,w,b):
return nd.dot(X,w) + b
# 使用平方损失来定义损失函数
def squared_loss(y_hat,y):
return (y_hat - y.reshape(y_hat.shape)) **2 / 2
# 定义优化算法,随机梯度下降算法
def sgd(params,lr,batch_size):
for param in params:
param[:] = param - lr*param.grad/batch_size
# 训练模型
batch_size = 10
lr = 0.03
num_epochs = 10 # 迭代次数
net = linereg
loss = squared_loss
for epoch in range(num_epochs):
for X,y in data_iter(batch_size,features,labels):
with autograd.record():
l = loss(net(X,w,b),y)
l.backward()
sgd([w,b],lr,batch_size)
train_l = loss(net(features,w,b),labels)
print('epoch为{},loss为{}'.format(epoch+1 , train_l.mean().asnumpy()))
# 比较训练得到的参数与真实参数
print('真实参数w为{},训练得到的参数w为{}'.format(true_w,w))
print('真实参数b为{},训练得到的参数为b{}'.format(true_b,b))
迭代10轮得到如下结果:
epoch为1,loss为[0.51093584]
epoch为2,loss为[0.48942772]
epoch为3,loss为[0.48564446]
epoch为4,loss为[0.48361933]
epoch为5,loss为[0.4838176]
epoch为6,loss为[0.48290232]
epoch为7,loss为[0.48315418]
epoch为8,loss为[0.4831911]
epoch为9,loss为[0.48474115]
epoch为10,loss为[0.4836766]
真实参数w为[2, -3.4],训练得到的参数w为
[[ 1.9964705]
[-3.432806 ]]
<NDArray 2x1 @cpu(0)>
真实参数b为4.2,训练得到的参数为b
[4.258036]
<NDArray 1 @cpu(0)>
2.使用MXNet的Gluon接口来实现一个线性回归:
from mxnet import autograd, nd
from mxnet import gluon , init # init模块提供了模型参数初始化的各种方法
from mxnet.gluon import data as gdata
from mxnet.gluon import loss as gloss
from mxnet.gluon import nn
# 生成数据集
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=1, shape=labels.shape)
# 读取数据
batch_size = 10
dataset = gdata.ArrayDataset(features,labels) # 将训练数据的特征和标签组合
data_iter = gdata.DataLoader(dataset,batch_size,shuffle=True) # 随机读取小批量
# 定义模型
net = nn.Sequential() # Sequential实例可以看做是一个串联各个层的容器,根据需要一次添加层即可
net.add(nn.Dense(1)) # 线性回归的输出层也是全连接层(Dense实例),并定义输出个数为1
# 初始化模型参数,将权重初始化成均值为0,、标准差为0.01的正态分布随机数,偏差初始化为0
net.initialize(init.Normal(sigma=0.01))
# 定义损失函数
loss = gloss.L2Loss() # 平方损失也就是L2范数损失
# 定义优化算法,创建一个Trainer实例,并指定学习率为0.03的小批量随机梯度下降sgd为优化算法
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.03})
# 训练模型
num_epochs = 10 # 迭代次数
for epoch in range(num_epochs):
for X,y in data_iter:
with autograd.record():
l = loss(net(X),y)
l.backward()
trainer.step(batch_size)
train_l = loss(net(features),labels)
print('epoch为{},loss为{}'.format(epoch+1 , train_l.mean().asnumpy()))
print('真实参数w为{},训练得到的参数w为{}'.format(true_w,net[0].weight.data())) # 通过从net中获得需要的层,然后访问其权重和偏差
print('真实参数b为{},训练得到的参数为b{}'.format(true_b,net[0].bias.data()))
迭代10轮得到如下结果:
epoch为1,loss为[0.527961]
epoch为2,loss为[0.48436356]
epoch为3,loss为[0.4837723]
epoch为4,loss为[0.4837433]
epoch为5,loss为[0.4832685]
epoch为6,loss为[0.4850266]
epoch为7,loss为[0.4834738]
epoch为8,loss为[0.48428085]
epoch为9,loss为[0.4842015]
epoch为10,loss为[0.4841783]
真实参数w为[2, -3.4],训练得到的参数w为
[[ 1.9223033 -3.4436655]]
<NDArray 1x2 @cpu(0)>
真实参数b为4.2,训练得到的参数为b
[4.227713]
<NDArray 1 @cpu(0)>
Reference:
《动手学深度学习》