clear;
clc;
tic;
%读取数据
trainImages = loadMNISTImages('train-images.idx3-ubyte');
trainLabels = loadMNISTLabels('train-labels.idx1-ubyte');
testImages = loadMNISTImages('t10k-images.idx3-ubyte');
testLabels = loadMNISTLabels('t10k-labels.idx1-ubyte');
% PCA降维同时归一化处理
square = double(trainImages * trainImages');
[eigVect, eigVal] = eig(square);
eigVal = diag(eigVal);
eigVal = cumsum(eigVal) / sum(eigVal);
k = find(eigVal >= 5e-2, 1);
pca = eigVect(:, k:end);
trainImages = pca' * trainImages;
testImages = pca' * testImages;
%读取样本参数
[feature_size,train_size] = size(trainImages);
[~,test_size] = size(testImages);
%隐含层神经元数、学习率
hid_size = 100; %隐含层有100个神经元
out_size = 10; %输出层有10个神经元
hid_alpha = 0.5; %隐含层学习率
out_alpha = 0.5; %输出层学习率
iteration = 100; %循环次数
correct_sum = zeros(1,iteration);
n = [1:1:iteration];
error = 0;
hid_weight = rand(hid_size,feature_size) * 2 - 1;
hid_bias = rand(hid_size,1) * 2 - 1;
out_weight = rand(out_size,hid_size) * 2 - 1;
out_bias = rand(out_size,1) * 2 - 1;
train_out = zeros(10,train_size);
loss = zeros(train_size,1);
testResults = zeros(test_size,1);
% 归一化处理
% [trainImages,PS] = mapminmax(trainImages,0,1); %按行归一化
% testImages = mapminmax('apply',testImages,PS); %根据训练数据的最大值、最小值进行归一化处理
% for i = 1 : train_size
% for j = 1 : feature_size
% if (trainImages(j,i) > 0.5)
% trainImages(j,i) = 1;
% else
% trainImages(j,i) = 0;
% end
% end
% end
% for i = 1 : test_size
% for j = 1 : feature_size
% if (testImages(j,i) > 0.5)
% testImages(j,i) = 1;
% else
% testImages(j,i) = 0;
% end
% end
% end
%获得实际的输出层
for i = 1 : train_size
train_out(trainLabels(i)+1,i) = 1;
end
%建立模型
for term = 1 : iteration
fprintf('第%d轮\n',term);
%计算模型误差
correct = 0;
for i = 1 : test_size
hid_layer = hid_weight * testImages(:,i) - hid_bias;
hid_layer = 1 ./ (1 + exp(-hid_layer));
out_layer = out_weight * hid_layer - out_bias;
out_layer = 1 ./ (1 + exp(-out_layer));
[~,pos] = max(out_layer);
testResults(i) = pos - 1;
if (testResults(i) == testLabels(i))
correct = correct + 1;
end
end
correct_sum(term) = correct / test_size;
% fprintf('正确率为%d\n',correct / test_size);
% % 如果收敛则提前退出
% if (correct / test_size > 0.95)
% break
% end
for i = 1 : train_size
%前向顺计算
%隐藏层计算
hid_layer = hid_weight * trainImages(:,i) - hid_bias;
hid_layer = 1 ./ (1 + exp(-hid_layer));
%输出层计算
out_layer = out_weight * hid_layer - out_bias;
out_layer = 1 ./ (1 + exp(-out_layer));
%后向逆计算
%校正误差计算
out_loss = (train_out(:,i) - out_layer) .* out_layer .* (1 - out_layer);
hid_loss = out_weight' * out_loss .* hid_layer .* (1 - hid_layer);
%更新权值和偏移
out_weight = out_weight + out_alpha * out_loss * hid_layer';
out_bias = out_bias + out_alpha * out_loss;
hid_weight = hid_weight + hid_alpha * hid_loss * trainImages(:,i)';
hid_bias = hid_bias + hid_alpha * hid_loss;
% %计算误差
% loss(i) = sum(abs(train_out(:,i) - out_layer));
end
% %如果收敛则提前退出
% loss_sum = sum(loss);
% fprintf('误差为%d\n',loss_sum);
% if (abs(loss_sum-loss_last) < 5 && loss_sum < 10)
% break
% end
% loss_last = loss_sum;
end
%根据模型测试
for i = 1 : test_size
hid_layer = hid_weight * testImages(:,i) - hid_bias;
hid_layer = 1 ./ (1 + exp(-hid_layer));
out_layer = out_weight * hid_layer - out_bias;
out_layer = 1 ./ (1 + exp(-out_layer));
[~,pos] = max(out_layer);
testResults(i) = pos - 1;
if (testResults(i) ~= testLabels(i))
error = error + 1;
end
end
fprintf('正确率为%d\n',1-error/test_size);
plot(n,correct_sum,'r');
title('正确率随循环次数变化曲线');
toc;