1.softmax从零实现
from mxnet.gluon import data as gdata
from sklearn import datasets
from mxnet import nd,autograd
知识兔
# 加载数据集digits = datasets.load_digits()features,labels = nd.array(digits['data']),nd.array(digits['target'])print(features.shape,labels.shape)labels_onehot = nd.one_hot(labels,10)print(labels_onehot.shape)
(1797, 64) (1797,)(1797, 10)
class softmaxClassifier: def __init__(self,inputs,outputs): self.inputs = inputs self.outputs = outputs self.weight = nd.random.normal(scale=0.01,shape=(inputs,outputs)) self.bias = nd.zeros(shape=(1,outputs)) self.weight.attach_grad() self.bias.attach_grad() def forward(self,x): output = nd.dot(x,self.weight) + self.bias return self._softmax(output) def _softmax(self,x): step1 = x.exp() step2 = step1.sum(axis=1,keepdims=True) return step1 / step2 def _bgd(self,params,learning_rate,batch_size): ''' 批量梯度下降 ''' for param in params: # 直接使用mxnet的自动求梯度 param[:] = param - param.grad * learning_rate / batch_size def loss(self,y_pred,y): return nd.sum((-y * y_pred.log())) / len(y) def dataIter(self,x,y,batch_size): dataset = gdata.ArrayDataset(x,y) return gdata.DataLoader(dataset,batch_size,shuffle=True) def fit(self,x,y,learning_rate,epoches,batch_size): for epoch in range(epoches): for x_batch,y_batch in self.dataIter(x,y,batch_size): with autograd.record(): y_pred = self.forward(x_batch) l = self.loss(y_pred,y_batch) l.backward() self._bgd([self.weight,self.bias],learning_rate,batch_size) if epoch % 50 == 0: y_all_pred = self.forward(x) print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,self.loss(y_all_pred,y),self.accuracyScore(y_all_pred,y))) def predict(self,x): y_pred = self.forward(x) return y_pred.argmax(axis=0) def accuracyScore(self,y_pred,y): acc_sum = (y_pred.argmax(axis=1) == y.argmax(axis=1)).sum().asscalar() return acc_sum / len(y)
sfm_clf = softmaxClassifier(64,10)sfm_clf.fit(features,labels_onehot,learning_rate=0.1,epoches=500,batch_size=200)
epoch:50,loss:[1.9941667]<NDArray 1 @cpu(0)>,accuracy:0.3550361713967724epoch:100,loss:[0.37214527]<NDArray 1 @cpu(0)>,accuracy:0.9393433500278241epoch:150,loss:[0.25443634]<NDArray 1 @cpu(0)>,accuracy:0.9549248747913188epoch:200,loss:[0.20699367]<NDArray 1 @cpu(0)>,accuracy:0.9588202559821926epoch:250,loss:[0.1799827]<NDArray 1 @cpu(0)>,accuracy:0.9660545353366722epoch:300,loss:[0.1619963]<NDArray 1 @cpu(0)>,accuracy:0.9677239844184753epoch:350,loss:[0.14888664]<NDArray 1 @cpu(0)>,accuracy:0.9716193656093489epoch:400,loss:[0.13875261]<NDArray 1 @cpu(0)>,accuracy:0.9738452977184195epoch:450,loss:[0.13058177]<NDArray 1 @cpu(0)>,accuracy:0.9760712298274903epoch:500,loss:[0.12379646]<NDArray 1 @cpu(0)>,accuracy:0.9777406789092933
print('预测结果:',sfm_clf.predict(features[:10]))print('真实结果:',labels[:10])
预测结果: [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]<NDArray 10 @cpu(0)>真实结果: [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]<NDArray 10 @cpu(0)>
2.使用mxnet实现softmax分类
from mxnet import gluon,nd,autograd,initfrom mxnet.gluon import nn,trainer,loss as gloss,data as gdata# 定义模型net = nn.Sequential()net.add(nn.Dense(10))# 初始化模型net.initialize(init=init.Normal(sigma=0.01))# 损失函数loss = gloss.SoftmaxCrossEntropyLoss(sparse_label=False)# 优化算法optimizer = trainer.Trainer(net.collect_params(),'sgd',{'learning_rate':0.1})# 训练epoches = 500batch_size = 200dataset = gdata.ArrayDataset(features, labels_onehot)data_iter = gdata.DataLoader(dataset,batch_size,shuffle=True)for epoch in range(epoches): for x_batch,y_batch in data_iter: with autograd.record(): l = loss(net.forward(x_batch), y_batch).sum() / batch_size l.backward() optimizer.step(batch_size) if epoch % 50 == 0: y_all_pred = net.forward(features) acc_sum = (y_all_pred.argmax(axis=1) == labels_onehot.argmax(axis=1)).sum().asscalar() print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,loss(y_all_pred,labels_onehot).sum() / len(labels_onehot),acc_sum/len(y_all_pred)))
epoch:50,loss:[2.1232333]<NDArray 1 @cpu(0)>,accuracy:0.24652198107957707epoch:100,loss:[0.37193483]<NDArray 1 @cpu(0)>,accuracy:0.9410127991096272epoch:150,loss:[0.25408813]<NDArray 1 @cpu(0)>,accuracy:0.9543683917640512epoch:200,loss:[0.20680156]<NDArray 1 @cpu(0)>,accuracy:0.9627156371730662epoch:250,loss:[0.1799252]<NDArray 1 @cpu(0)>,accuracy:0.9666110183639399epoch:300,loss:[0.16203885]<NDArray 1 @cpu(0)>,accuracy:0.9699499165275459epoch:350,loss:[0.14899409]<NDArray 1 @cpu(0)>,accuracy:0.9738452977184195epoch:400,loss:[0.13890252]<NDArray 1 @cpu(0)>,accuracy:0.9749582637729549epoch:450,loss:[0.13076076]<NDArray 1 @cpu(0)>,accuracy:0.9755147468002225epoch:500,loss:[0.1239901]<NDArray 1 @cpu(0)>,accuracy:0.9777406789092933