从零和使用mxnet实现softmax分类

1.softmax从零实现

from mxnet.gluon import data as gdata
from sklearn import datasets
from mxnet import nd,autograd
知识兔
# 加载数据集digits = datasets.load_digits()features,labels = nd.array(digits['data']),nd.array(digits['target'])print(features.shape,labels.shape)labels_onehot = nd.one_hot(labels,10)print(labels_onehot.shape)
(1797, 64) (1797,)(1797, 10)
class softmaxClassifier:    def __init__(self,inputs,outputs):        self.inputs = inputs        self.outputs = outputs                self.weight = nd.random.normal(scale=0.01,shape=(inputs,outputs))        self.bias = nd.zeros(shape=(1,outputs))        self.weight.attach_grad()        self.bias.attach_grad()            def forward(self,x):        output = nd.dot(x,self.weight) + self.bias        return self._softmax(output)            def _softmax(self,x):        step1 = x.exp()        step2 = step1.sum(axis=1,keepdims=True)        return step1 / step2        def _bgd(self,params,learning_rate,batch_size):        '''        批量梯度下降        '''        for param in params:       # 直接使用mxnet的自动求梯度            param[:] = param - param.grad * learning_rate / batch_size                def loss(self,y_pred,y):        return nd.sum((-y * y_pred.log())) / len(y)                def dataIter(self,x,y,batch_size):        dataset = gdata.ArrayDataset(x,y)        return gdata.DataLoader(dataset,batch_size,shuffle=True)        def fit(self,x,y,learning_rate,epoches,batch_size):        for epoch in range(epoches):            for x_batch,y_batch in self.dataIter(x,y,batch_size):                with autograd.record():                    y_pred = self.forward(x_batch)                    l = self.loss(y_pred,y_batch)                l.backward()                self._bgd([self.weight,self.bias],learning_rate,batch_size)            if epoch % 50 == 0:                y_all_pred = self.forward(x)                print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,self.loss(y_all_pred,y),self.accuracyScore(y_all_pred,y)))                def predict(self,x):        y_pred = self.forward(x)        return y_pred.argmax(axis=0)        def accuracyScore(self,y_pred,y):        acc_sum = (y_pred.argmax(axis=1) == y.argmax(axis=1)).sum().asscalar()        return acc_sum / len(y)
sfm_clf = softmaxClassifier(64,10)sfm_clf.fit(features,labels_onehot,learning_rate=0.1,epoches=500,batch_size=200)
epoch:50,loss:[1.9941667]<NDArray 1 @cpu(0)>,accuracy:0.3550361713967724epoch:100,loss:[0.37214527]<NDArray 1 @cpu(0)>,accuracy:0.9393433500278241epoch:150,loss:[0.25443634]<NDArray 1 @cpu(0)>,accuracy:0.9549248747913188epoch:200,loss:[0.20699367]<NDArray 1 @cpu(0)>,accuracy:0.9588202559821926epoch:250,loss:[0.1799827]<NDArray 1 @cpu(0)>,accuracy:0.9660545353366722epoch:300,loss:[0.1619963]<NDArray 1 @cpu(0)>,accuracy:0.9677239844184753epoch:350,loss:[0.14888664]<NDArray 1 @cpu(0)>,accuracy:0.9716193656093489epoch:400,loss:[0.13875261]<NDArray 1 @cpu(0)>,accuracy:0.9738452977184195epoch:450,loss:[0.13058177]<NDArray 1 @cpu(0)>,accuracy:0.9760712298274903epoch:500,loss:[0.12379646]<NDArray 1 @cpu(0)>,accuracy:0.9777406789092933
print('预测结果:',sfm_clf.predict(features[:10]))print('真实结果:',labels[:10])
预测结果: [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]<NDArray 10 @cpu(0)>真实结果: [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]<NDArray 10 @cpu(0)>

2.使用mxnet实现softmax分类

from mxnet import gluon,nd,autograd,initfrom mxnet.gluon import nn,trainer,loss as gloss,data as gdata# 定义模型net = nn.Sequential()net.add(nn.Dense(10))# 初始化模型net.initialize(init=init.Normal(sigma=0.01))# 损失函数loss = gloss.SoftmaxCrossEntropyLoss(sparse_label=False)# 优化算法optimizer = trainer.Trainer(net.collect_params(),'sgd',{'learning_rate':0.1})# 训练epoches = 500batch_size = 200dataset = gdata.ArrayDataset(features, labels_onehot)data_iter = gdata.DataLoader(dataset,batch_size,shuffle=True)for epoch in range(epoches):    for x_batch,y_batch in data_iter:        with autograd.record():            l = loss(net.forward(x_batch), y_batch).sum() / batch_size        l.backward()        optimizer.step(batch_size)    if epoch % 50 == 0:        y_all_pred = net.forward(features)        acc_sum = (y_all_pred.argmax(axis=1) == labels_onehot.argmax(axis=1)).sum().asscalar()        print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,loss(y_all_pred,labels_onehot).sum() / len(labels_onehot),acc_sum/len(y_all_pred)))
epoch:50,loss:[2.1232333]<NDArray 1 @cpu(0)>,accuracy:0.24652198107957707epoch:100,loss:[0.37193483]<NDArray 1 @cpu(0)>,accuracy:0.9410127991096272epoch:150,loss:[0.25408813]<NDArray 1 @cpu(0)>,accuracy:0.9543683917640512epoch:200,loss:[0.20680156]<NDArray 1 @cpu(0)>,accuracy:0.9627156371730662epoch:250,loss:[0.1799252]<NDArray 1 @cpu(0)>,accuracy:0.9666110183639399epoch:300,loss:[0.16203885]<NDArray 1 @cpu(0)>,accuracy:0.9699499165275459epoch:350,loss:[0.14899409]<NDArray 1 @cpu(0)>,accuracy:0.9738452977184195epoch:400,loss:[0.13890252]<NDArray 1 @cpu(0)>,accuracy:0.9749582637729549epoch:450,loss:[0.13076076]<NDArray 1 @cpu(0)>,accuracy:0.9755147468002225epoch:500,loss:[0.1239901]<NDArray 1 @cpu(0)>,accuracy:0.9777406789092933
计算机