%% I. 清空环境变量
clear;
clc;
%% II. 导入数据 50:样本数,159:维度
Sample_ori = readmatrix("E:\dfy\heavy_metals\code\HM_dfy\heavymetal.xlsx","Sheet","ori"); %读入数据 %原始数据存放在纯文本文件pz.txt中
load('XSel_CARS.mat');
Sample_pre = XSel_CARS(:,:);
%% 光谱处理
% [m,n] = size(Sample_ori); % m样本数50,n维数 155
% y=Sample_ori(:,1:n-5); %选择1到n-1个样本为y
% % Sample = DWT(y);
% % Sample = SMOOTH(y);
% % Sample = snv(y);
% % y_m=mean(y); %1*254
% % Sample=msc(y,y_m); %多元散射校正
% % Sample = savgol(y,7,3,1);
% ttt = Sample_ori(:,159);
% Sample = dosc(y,ttt,2,0.1);
%% 训练集与测试集的划分
[m,n] = size(Sample_ori); % 数据的大小相等
ratio = 2/3; % 训练数据比例
m_train = round(m*ratio); % 训练数据行数
P_train = Sample_pre(1:m_train,1:19)'; % 训练集和测试集的样本
P_test = Sample_pre(m_train+1:m,1:19)';
T_train = Sample_ori(1:m_train,155)'; % 训练集和测试集的样本标签
T_test = Sample_ori(m_train+1:m,155)';
%% ELM创建/训练
[IW,B,LW,TF,TYPE] = elmtrain(P_train,T_train,80,'sig',0);%训练集样本数的1/3左右时效果最佳
%% ELM仿真测试
T_sim_1 = elmpredict(P_train,IW,B,LW,TF,TYPE);
T_sim_2 = elmpredict(P_test,IW,B,LW,TF,TYPE);
%% 结果对比
result_1 = [T_train' T_sim_1'];
result_2 = [T_test' T_sim_2'];
% 训练集均方误差
E =mse(T_sim_1-T_train);
% 训练集决定系数
N = length(T_train);
R2=(N*sum(T_sim_1.*T_train)-sum(T_sim_1)*sum(T_train))^2/((N*sum((T_sim_1).^2)-(sum(T_sim_1))^2)*(N*sum((T_train).^2)-(sum(T_train))^2));
% 预测集均方误差
E1 =mse(T_sim_2-T_test);
% 预测集决定系数
N1=length(T_test);
R21=(N*sum(T_sim_2.*T_test)-sum(T_sim_2)*sum(T_test))^2/((N*sum((T_sim_2).^2)-(sum(T_sim_2))^2)*(N*sum((T_test).^2)-(sum(T_test))^2));
%% 绘图
figure(1)
plot(1:N,T_train,'r-*',1:N,T_sim_1,'b:o');
axis([1,40,0.,300.0]);
grid on
legend('真实值','预测值')
xlabel('样本编号')
ylabel('样本划分')
string = {'训练集样本含量预测结果对比(ELM)';['(mse = ' num2str(E) ' R^2 = ' num2str(R2) ')']};
title(string)
figure(2)
plot(1:N1,T_test,'r-*',1:N1,T_sim_2,'b:o');
axis([1,20,0.,300.0]);
grid on
legend('真实值','预测值')
xlabel('样本编号')
ylabel('样本划分')
string = {'测试集样本含量预测结果对比(ELM)';['(mse= ' num2str(E1) ' R^2 = ' num2str(R21) ')']};
title(string)