function [ forest ] = RandomForest_train(X,label,para )
%% 训练 好多 决策树
% X: n by d matrix, number of instance=n, number of feature=d
% label: n by 1 integer向量
% para:结构体,包括:
% para.treeNum:决策树的数量
% para.bagsize:(0,inf),构造每棵决策树时,每棵树训练样本数=n*para.bagsize
% para.nodeInputFeaNum:每个结点输入候选特征的最大数量
%
% para.treePara:结构体,决策树的参数,包括:
% para.treePara.measure:指明挑选分裂特征的标准
% para.treePara.measure='gain':信息增益
% para.treePara.measure='gain ratio':信息增益率
% para.treePara.measure='gini':基尼系数
% para.treePara.valuetype:指明特征取值是连续型or离散型
% para.treePara.valuetype='con':连续型
% para.treePara.valuetype='dis':离散型
% forest:cell(treeNum,1),forest{i,1}是结构体,包括:
% forest{i,1}.trainObj:参与训练的样本,正整数向量
% forest{i,1}.tree:决策树mode
forest=cell(para.treeNum,1);
for j=1:1:para.treeNum
[trainX,trainLable,choosedObj]=bagging(X,label,para.bagsize);
forest{j,1}.tree= DecisionTree_train( trainX,trainLable,para.treePara,para.nodeInputFeaNum);
forest{j,1}.trainObj=choosedObj;
end
%-------内部调用函数----------------------------------{%
function [bagX,bagLabel,choosedobj]=bagging(X,label,bagsize)
%随机选取样本,每个样本被选中的概率为bagging_probability
[n,d]=size(X);
objNum=ceil(n*bagsize);
bagX=zeros(objNum,d);
bagLabel=zeros(objNum,1);
choosedobj=zeros(objNum,1);
for i=1:1:objNum
id=floor(rand()*n+1);
bagX(i,:)=X(id,:);
bagLabel(i)=label(id);
choosedobj(i)=id;
end
end
function [project_bagX,choosedFeature]=randomProject(bagX,chooseFeature_ratio)
%随机选取一定比例(chooseFeature_ratio)的特征
[~,D]=size(bagX);
d=ceil(D*chooseFeature_ratio);
randNum=rand(d,1);
[~,I]=sort(randNum);
choosedFeature=zeros(d,1)==1;
for k=1:1:d
choosedFeature(I(k))=true;
end
project_bagX=bagX(:,choosedFeature);
end
%-------内部调用函数----------------------------------{%
end
RandomForset.rar_Random Forest_Randomforset_随机森林 预测_随机预测_预测
版权申诉
5星 · 超过95%的资源 180 浏览量
2022-07-15
07:26:26
上传
评论 1
收藏 2KB RAR 举报
钱亚锋
- 粉丝: 86
- 资源: 1万+