%**************************************************************
%* mex interface to Andy Liaw et al.'s C code (used in R package randomForest)
%* Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu )
%* License: GPLv2
%* Version: 0.02
%
% Calls Classification Random Forest
% A wrapper matlab file that calls the mex file
% This does training given the data and labels
% Documentation copied from R-packages pdf
% http://cran.r-project.org/web/packages/randomForest/randomForest.pdf
% Tutorial on getting this working in tutorial_ClassRF.m
%**************************************************************
% function model = classRF_train(X,Y,ntree,mtry, extra_options)
%
% ___Options
% requires 2 arguments and the rest 3 are optional
% X: data matrix
% Y: target values
% ntree (optional): number of trees (default is 500). also if set to 0
% will default to 500
% mtry (default is floor(sqrt(size(X,2))) D=number of features in X). also if set to 0
% will default to 500
%
%
% Note: TRUE = 1 and FALSE = 0 below
% extra_options represent a structure containing various misc. options to
% control the RF
% extra_options.replace = 0 or 1 (default is 1) sampling with or without
% replacement
% extra_options.classwt = priors of classes. Here the function first gets
% the labels in ascending order and assumes the
% priors are given in the same order. So if the class
% labels are [-1 1 2] and classwt is [0.1 2 3] then
% there is a 1-1 correspondence. (ascending order of
% class labels). Once this is set the freq of labels in
% train data also affects.
% extra_options.cutoff (Classification only) = A vector of length equal to number of classes. The ?winning?
% class for an observation is the one with the maximum ratio of proportion
% of votes to cutoff. Default is 1/k where k is the number of classes (i.e., majority
% vote wins).
% extra_options.strata = (not yet stable in code) variable that is used for stratified
% sampling. I don't yet know how this works. Disabled
% by default
% extra_options.sampsize = Size(s) of sample to draw. For classification,
% if sampsize is a vector of the length the number of strata, then sampling is stratified by strata,
% and the elements of sampsize indicate the numbers to be
% drawn from the strata.
% extra_options.nodesize = Minimum size of terminal nodes. Setting this number larger causes smaller trees
% to be grown (and thus take less time). Note that the default values are different
% for classification (1) and regression (5).
% extra_options.importance = Should importance of predictors be assessed?
% extra_options.localImp = Should casewise importance measure be computed? (Setting this to TRUE will
% override importance.)
% extra_options.proximity = Should proximity measure among the rows be calculated?
% extra_options.oob_prox = Should proximity be calculated only on 'out-of-bag' data?
% extra_options.do_trace = If set to TRUE, give a more verbose output as randomForest is run. If set to
% some integer, then running output is printed for every
% do_trace trees.
% extra_options.keep_inbag Should an n by ntree matrix be returned that keeps track of which samples are
% 'in-bag' in which trees (but not how many times, if sampling with replacement)
%
% Options eliminated
% corr_bias which happens only for regression ommitted
% norm_votes - always set to return total votes for each class.
%
% ___Returns model which has
% importance = a matrix with nclass + 2 (for classification) or two (for regression) columns.
% For classification, the first nclass columns are the class-specific measures
% computed as mean decrease in accuracy. The nclass + 1st column is the
% mean decrease in accuracy over all classes. The last column is the mean decrease
% in Gini index. For Regression, the first column is the mean decrease in
% accuracy and the second the mean decrease in MSE. If importance=FALSE,
% the last measure is still returned as a vector.
% importanceSD = The ?standard errors? of the permutation-based importance measure. For classification,
% a p by nclass + 1 matrix corresponding to the first nclass + 1
% columns of the importance matrix. For regression, a length p vector.
% localImp = a p by n matrix containing the casewise importance measures, the [i,j] element
% of which is the importance of i-th variable on the j-th case. NULL if
% localImp=FALSE.
% ntree = number of trees grown.
% mtry = number of predictors sampled for spliting at each node.
% votes (classification only) a matrix with one row for each input data point and one
% column for each class, giving the fraction or number of ?votes? from the random
% forest.
% oob_times number of times cases are 'out-of-bag' (and thus used in computing OOB error
% estimate)
% proximity if proximity=TRUE when randomForest is called, a matrix of proximity
% measures among the input (based on the frequency that pairs of data points are
% in the same terminal nodes).
% errtr = first column is OOB Err rate, second is for class 1 and so on
function model = classRF_train(p_train, t_train, ntree, mtry, extra_options)
DEFAULTS_ON = 0;
TRUE = 1;
FALSE = 0;
DEBUG_ON = 0;
orig_labels = sort(unique(t_train));
Y_new = t_train;
new_labels = 1 : length(orig_labels);
for i = 1:length(orig_labels)
Y_new(t_train == orig_labels(i)) = Inf;
Y_new(isinf(Y_new)) = new_labels(i);
end
t_train = Y_new;
if exist('extra_options', 'var')
if isfield(extra_options, 'DEBUG_ON'); DEBUG_ON = extra_options.DEBUG_ON; end
if isfield(extra_options, 'replace'); replace = extra_options.replace; end
if isfield(extra_options, 'classwt'); classwt = extra_options.classwt; end
if isfield(extra_options, 'cutoff'); cutoff = extra_options.cutoff; end
if isfield(extra_options, 'strata'); strata = extra_options.strata; end
if isfield(extra_options, 'sampsize'); sampsize = extra_options.sampsize; end
if isfield(extra_options, 'nodesize'); nodesize = extra_options.nodesize; end
if isfield(extra_options, 'importance'); importance = extra_options.importance; end
if isfield(extra_options, 'localImp'); localImp = extra_options.localImp; end
if isfield(extra_options, 'nPerm'); nPerm = extra_options.nPerm; end
if isfield(extra_options, 'proximity'); proximity = extra_options.proximity; end
if isfield(extra_options, 'oob_prox'); oob_prox = extra_options.oob_prox; end
% if isfield(extra_options, 'norm_votes'); norm_votes = extra_options.norm_votes; end
if isfield(extra_options, 'do_trace'); do_trace = extra_options.do_trace; end
% if isfield(extra_options, 'corr_bias'); corr_bias = extra_options.corr_bias; end
if isfield(extra_options, 'keep_inbag'); keep_inbag = extra_options.keep_inbag; end
end
keep_forest = 1; % always save the trees :)
% set defaults if not already set
if ~exist('DEBUG_ON', 'var'); DEBUG_ON = FALSE; end
if ~exist('replace', 'var'); replace = TRUE; end
% if ~exist('classwt', 'var'); classwt = []; end % will handle these three later
% if ~exist('cutoff', 'var'); cutoff = 1; end
没有合适的资源?快使用搜索试试~ 我知道了~
遗传算法(GA)优化随机森林(RF)的分类预测,GA-RF分类预测模型,多输入单输出模型 多特征输入单输出的二分类及多分类模
共14个文件
m:11个
mexw64:2个
xlsx:1个
需积分: 0 8 下载量 180 浏览量
2023-09-21
11:48:28
上传
评论
收藏 115KB ZIP 举报
温馨提示
遗传算法(GA)优化随机森林(RF)的分类预测,GA-RF分类预测模型,多输入单输出模型。 多特征输入单输出的二分类及多分类模型。程序内注释详细,直接替换数据就可以用。 程序语言为matlab,程序可出分类效果图,迭代优化图,混淆矩阵图。
资源推荐
资源详情
资源评论
收起资源包目录
0.2 GA_RF分类 - 2.zip (14个子文件)
classRF_train.m 15KB
GA.m 2KB
initialization.m 584B
main.m 1KB
数据集.xlsx 73KB
classRF_predict.m 2KB
mexClassRF_predict.mexw64 26KB
Code.m 386B
mexClassRF_train.mexw64 43KB
Cross.m 1KB
test.m 554B
fun.m 696B
Select2.m 889B
Mutation.m 1KB
共 14 条
- 1
资源评论
智能算法及其模型预测
- 粉丝: 2382
- 资源: 871
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- 提取文字_20241116.docx
- 使用 PyTorch /TensorFlow实现 ZFNet 进行 MNIST 图像分类
- Python课程设计之俄罗斯方块
- C#仓库条码管理系统源码数据库 SQL2008源码类型 WinForm
- 网安面试题合集-来自网络收集.zip
- (2024最新整理)42万+世界各国新冠疫情数据(2020.1-2024.8)
- ESP32开发板(CH340驱动芯片) Type-C口+数据线+0.96显示屏+杜邦线 链接wifi,显示当前时间
- windows hex查看工具
- C语言编程基础及其应用详解:从入门到实战练习.pdf
- C#CS小型医院HIS门诊挂号收费管理系统源码数据库 SQL2008源码类型 WinForm
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功