% BP神经网络(小批量梯度下降法)RMSProp法
% x:样本输入,y为样本输出,hide_node_arry为神经网络隐藏层神经元个数
% w:权重 b:偏置项
function [w, b] = BP_Net_Adam(x, y, hide_node_array, maxItorNum)
eps = 1e-10;
step = 50;
% 神经元节点个数
[xnum, xlen] = size(x);
[~, ylen] = size(y);
node_array = [xlen, hide_node_array, ylen];
[Mw, Mb] = init_gradient_w_b(node_array); % 冲量平均
[Gw, Gb] = init_gradient_w_b(node_array); % 累计梯度平方和平均
% 初始化权重和偏置项
[w, b] = init_w_b(node_array);
L = length(node_array) - 1;
d_error = 1;
num = 0;
error_list = [];
while abs(d_error) > eps
% 遍历样本
sum_error = 0;
k = 1;
while k <= xnum
[avg_gradient_w, avg_gradient_b] = init_gradient_w_b(node_array); % 初始化梯度为全零,用于记录平均值
for i = 1: step
% 计算网络中的输入和输出值
[layer_in, layer_out] = net_value(w, b, x(k + i - 1, :));
sum_error = sum_error + abs(layer_out{L + 1} - y(k + i - 1, :)); % 输出层累计误差
% 计算w,b梯度
[gradient_w, gradient_b] = calc_gradient(layer_in, layer_out, w, y(k + i - 1, :));
% 求平均梯度
[avg_gradient_w, avg_gradient_b] = calc_avg_gradient(i, avg_gradient_w, avg_gradient_b, gradient_w, gradient_b);
end
k = k + step;
% 更新w,b
[Mw, Mb] = update_Mwb(Mw, Mb, avg_gradient_w, avg_gradient_b);
[Gw, Gb] = update_Gwb(Gw, Gb, avg_gradient_w, avg_gradient_b);
[vw, vb] = update_v(Mw, Mb, Gw, Gb);
[w, b] = adjust_w_b(w, b, vw, vb);
end
d_error = sum_error / xnum; % 输出层平均误差
num = num + 1;
fprintf('Iteration number = %d; d_error = %d\n', num, d_error);
error_list = [error_list, abs(d_error)];
if mod(num, 30) == 0
hold on;
plotf(w, b, num);
plot_error(error_list);
pause(0.001);
end
if num > maxItorNum
fprintf('d_error = %d\n', d_error);
break;
end
end
end
% 初始化权重和偏置项
function [w, b] = init_w_b(node_array)
layer = length(node_array);
w = cell(1, layer-1);
b = cell(1, layer-1);
for i = 2: layer
input_node_num = node_array(i-1);
node_num = node_array(i);
w{i-1} = rands(node_num, input_node_num);
b{i-1} = rands(node_num);
end
end
% 初始化梯度为0
function [gw, gb] = init_gradient_w_b(node_array)
layer = length(node_array);
gw = cell(1, layer-1);
gb = cell(1, layer-1);
for i = 2: layer
input_node_num = node_array(i-1);
node_num = node_array(i);
gw{i-1} = zeros(node_num, input_node_num);
gb{i-1} = zeros(node_num);
end
end
% 计算w,b梯度
function [gradient_w, gradient_b] = calc_gradient(layer_in, layer_out, w, y)
% 1)计算输出层神经元w、b的梯度
% 计算误差
% 神经元层数(不包括输入层)
L = length(layer_in);
error = cell(1, L);
d = layer_out{L + 1};
% 计算输出层误差(输出层采用线性函数)
error{L} = (d-y);
% 计算w、b的梯度
gradient_w{L} = calc_gradient_w(error{L}, layer_out{L});
gradient_b{L} = error{L};
% 2)计算计算(2~L-1)神经元w、b的梯度
for i = 1: L-1
layer = L - i;
error{layer} = calc_error_2(error{layer+1}, w{layer+1}, layer_in{layer});
gradient_w{layer} = calc_gradient_w(error{layer}, layer_out{layer});
gradient_b{layer} = error{layer};
end
end
% 计算w的梯度
function [gradient_w] = calc_gradient_w(delta, pre_layer_out)
node_num = length(delta); % 该层神经元节点个数和误差个数一样
input_node_num = length(pre_layer_out); % 前一层神经元节点个数
gradient_w = zeros(node_num, input_node_num);
% 调整每个神经元节点对应的权重
for i = 1: node_num
for j = 1: input_node_num
gradient_w(i, j) = delta(i).*pre_layer_out(j);
end
end
end
% 计算所有神经元节点误差(2~L-1层)
function delta = calc_error_2(back_error, back_w, layer_in)
diff_f = @sigmod_diff_func;
node_num = length(layer_in);
back_node_num = length(back_error);
delta = zeros(node_num, 1);
for i = 1: node_num
for j = 1: back_node_num
delta(i) = delta(i) + back_error(j)*back_w(j, i)*diff_f(layer_in(i));
end
end
end
% 求平均梯度
function [avg_gradient_w_new, avg_gradient_b_new] = calc_avg_gradient(iteratorNum, avg_gradient_w, avg_gradient_b, gradient_w, gradient_b)
avg_gradient_w_new = avg_gradient_w;
avg_gradient_b_new = avg_gradient_b;
if iteratorNum == 1
avg_gradient_w_new = gradient_w;
avg_gradient_b_new = gradient_b;
else
node_num = length(gradient_b);
for i = 1: node_num
avg_gradient_w_new{i} = (avg_gradient_w{i} * (iteratorNum - 1) + gradient_w{i})./iteratorNum;
avg_gradient_b_new{i} = (avg_gradient_b{i} * (iteratorNum - 1) + gradient_b{i})./iteratorNum;
end
end
end
% 更新冲量平均
function [Mw_new, Mb_new] = update_Mwb(Mw, Mb, gradient_w, gradient_b)
beta1 = 0.9;
Mw_new = gradient_w;
Mb_new = gradient_b;
node_num = length(gradient_b);
for i = 1: node_num
Mw_new{i} = beta1*Mw{i} + (1-beta1)*gradient_w{i};
Mb_new{i} = beta1*Mb{i} + (1-beta1)*gradient_b{i};
end
end
% 更新历史梯度平方和平均
function [Gw_new, Gb_new] = update_Gwb(Gw, Gb, gradient_w, gradient_b)
gama = 0.999;
Gw_new = gradient_w;
Gb_new = gradient_b;
node_num = length(gradient_b);
for i = 1: node_num
Gw_new{i} = gama*Gw{i} + (1-gama)*gradient_w{i}.*gradient_w{i};
Gb_new{i} = gama*Gb{i} + (1-gama)*gradient_b{i}.*gradient_b{i};
end
end
% 更新w,b速度
function [vw, vb] = update_v(Mw, Mb, Gw, Gb)
vw = Gw;
vb = Gb;
node_num = length(vb);
for i = 1: node_num
vw{i} = calc_v(Mw{i}, Gw{i});
vb{i} = calc_v(Mb{i}, Gb{i});
end
end
% 偏差校正
function vv = calc_v(m, v)
beta1_t = 1/9;
beta2_t = 1/999;
alpha = 0.01;
epsilon = 1e-8;
m_ = m./(1 - beta1_t);
v_ = v./(1 - beta2_t);
vv = alpha./sqrt(v_ + epsilon).*m_;
end
% 调整w,b
function [w_new, b_new] = adjust_w_b(w, b, vw, vb)
w_new = w;
b_new = b;
node_num = length(vb);
for i = 1: node_num
w_new{i} = w{i} - vw{i};
b_new{i} = b{i} - vb{i};
end
end
% sigmod导数
function v = sigmod_diff_func(x)
f = @sigmod_func;
v = 1-f(x).*f(x);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 绘制曲线
function plotf(w, b, num)
f = @test_func;
x = linspace(-2*pi, 2*pi, 100)';
y = f(x);
tx = -2*pi:0.01:2*pi;
ty = tx;
index = 1;
for xi = tx
[~,o] = net_value(w, b, xi);
ty(index) = o{length(b) + 1};
index = index + 1;
end
subplot(2, 1, 1);cla reset;
plot(x, y, '*r');
hold on;
plot(tx, ty, '-g');
legend('points on test_func', 'net fit line');
title(['Iteration Num = ', num2str(num)]);
end
% 绘制误差
function plot_error(errorList)
subplot(2, 1, 2);cla reset;
iteror_num = 1: length(errorList);
error = errorList(length(errorList));
plot(iteror_num, errorList);
title(['error = ', num2str(error)]);
end