function [ALL_INDICES, P_CORRECT] = sequential_floating_forward_selection(DATA, PARAMETERS)
%在主函数中调用这个函数
%SEQUENTIAL_FLOATING_FORWARD_SELECTION - uses the SFFS algorithm out of [1] to determine the best features
%
% [1]: Floating Search Methods for Feature Selection with Nonmonotonic Criterion Functions
% P. Pudial et al., IEEE Pattern Recognition (Conference B), 1994
%
%
% Possible parameters are:
%
% PARAMETERS.SFFS_DIMENSION - 应该选择的特征数量number of features that should be selected
% PARAMETERS.CLASSIFIER - 包含应该用于分析的分类器 contains the classifier that should be used for
% the analysis
% PARAMETERS.TCR - trainings to classification ratio
% if NaN then use all features for training and classification
% else use given ratio
% PARAMETERS.ITERATIONS - 获得数值稳定结果的迭代次数number of iterations that are done to get numerical stable
% results.
% PARAMETERS.FEATURE_REDUCTION - 1: Fisher transformation (determinant criterion)
% 2: Fisher transformation (trace criterion)
% 3: PCA
% NaN: no Transformation (Default)
% PARAMETERS.REDUCED_DIMENSION -如果参数为,则忽略。FEATURE_REDUCTION == NaN ignored if PARAMETERS.FEATURE_REDUCTION == NaN
% = 1,... then reduce feature dimension (Default: 3)
% PARAMETERS.CONSTRAIN_DIAGONAL - if '0', no constrain is applied otherwise
% -1 is added to P_CORRECT if any element of the diagonal
% is smaller then PARAMETERS.CONSTRAIN_DIAGONAL
% PARAMETERS.VERBOSE ?????? 0: Suppress all output
% ????????????????1: Show intermediate results and progress bar (Default)
% ????????????????2: Additionally show status of evaluate_classifier
%
% NOTE: use normalize_matrix to get the same value range for all features
% this is useful to avoid singular covariance matrices
%
% NOTE: DATA.FEATURES is used instead of DATA.TRAINING and DATA.CLASSIFICATION
%
% NOTE: It is possilbe to set additional parameters in PARAMETERS. As PARAMETERS
% are passed on to the classificator this enables the user to select certain
% options of the classifier, i.e. PARAMETERS.GMM_DIM = 2 will tell
% gmm_classifier to use a constant dimension of two for the mixtures.
%% 1. INITIALISATION
% if DATA.FEATURES does not exist -> create it
if isfield(DATA,'FEATURES') == 0
DATA.FEATURES = [DATA.TRAINING; DATA.CLASSIFICATION];
end
[TRAINING_LENGTH, CLASSIFICATION_LENGTH, NUM_CLASSES, CLASSES, FEATURE_LENGTH] = get_lengths(DATA, PARAMETERS);
if exist('PARAMETERS','var') == 0 || isfield(PARAMETERS,'SFFS_DIMENSION') == 0
% PARAMETERS.SFFS_DIMENSION = min(10, FEATURE_LENGTH);
PARAMETERS.SFFS_DIMENSION = min(10, FEATURE_LENGTH);
end
if exist('PARAMETERS','var') == 0 || isfield(PARAMETERS,'CONSTRAIN_DIAGONAL') == 0
PARAMETERS.CONSTRAIN_DIAGONAL = 0;
end
if exist('PARAMETERS','var') == 0 || isfield(PARAMETERS,'VERBOSE') == 0
PARAMETERS.VERBOSE = 1;
end
if PARAMETERS.VERBOSE > 1
PARAMETERS.SHOW_STATUS = 1;
end
if PARAMETERS.SFFS_DIMENSION > FEATURE_LENGTH
warning('Changed SFFS_DIMENSION from %d to %d', PARAMETERS.SFFS_DIMENSION, FEATURE_LENGTH);
PARAMETERS.SFFS_DIMENSION = FEATURE_LENGTH;
end
%% SEQUENTIAL FLOATING FORWARD SELECTION - ALGORITHM
% This Vector contains all indices that are currently selected
INDICES = [];
P_CORRECT = zeros(1, PARAMETERS.SFFS_DIMENSION);
% at the beginning we can apply Sequential floating selection twice to fill INDICES with two features
[INDEX, P_CORRECT(length(INDICES) + 1)] = get_most_significant_feature();
INDICES = [INDICES INDEX];
ALL_INDICES{length(INDICES)} = INDICES;
if PARAMETERS.VERBOSE > 0
print_status();
end
[INDEX, P_CORRECT(length(INDICES) + 1)] = get_most_significant_feature();
INDICES = [INDICES INDEX];
ALL_INDICES{length(INDICES)} = INDICES;
while length(INDICES) ~= PARAMETERS.SFFS_DIMENSION
if PARAMETERS.VERBOSE > 0
print_status();
end
% 1. step: inclusion
[INDEX, P_CORRECT(length(INDICES) + 1)] = get_most_significant_feature();
INDICES = [INDICES INDEX];
ALL_INDICES{length(INDICES)} = INDICES;
% 2. step: exclusion
[INDEX, P_CORRECT_EXCLUSION] = get_least_significant_feature();
while (P_CORRECT_EXCLUSION > P_CORRECT(length(INDICES) - 1) && length(INDICES) > 2) % only do backward step if obtained P_CORRECT is better as before at this step
if PARAMETERS.VERBOSE > 0
fprintf('Deleting %d\n', INDICES(INDEX));
end
% delete INDEX
INDICES(INDEX) = [];
ALL_INDICES{length(INDICES)} = INDICES;
% Update P_CORRECT
P_CORRECT(length(INDICES)) = P_CORRECT_EXCLUSION;
% determine next feature to exclude
[INDEX, P_CORRECT_EXCLUSION] = get_least_significant_feature();
end
end
if PARAMETERS.VERBOSE > 0
fprintf('Result for dimension %d: P_CORRECT = %f\n',length(INDICES),P_CORRECT(length(INDICES)))
fprintf('Selected features: %d\n', INDICES)
[VAL,IND] = max(P_CORRECT);
if IND ~= length(INDICES)
warning('Best result for dimension %d with P_CORRECT %f and not for dimension %d with P_CORRECT %f \n\n',IND,VAL,length(INDICES),P_CORRECT(end));
end
end
%% NESTED FUNCTION: get_most_significant_feature
function [INDEX, P_CORRECT] = get_most_significant_feature()
% this function obtains the most significant feature among all
% features that are currently not selected
EVAL_MATRIX = zeros(FEATURE_LENGTH,1);
if PARAMETERS.VERBOSE > 0
% h = waitbar(0,'Please wait');
end
for INDEX_C = 1:length(DATA.FEATURES(1,1:FEATURE_LENGTH))
if PARAMETERS.VERBOSE > 0
PR = INDEX_C/length(DATA.FEATURES(1,1:FEATURE_LENGTH));
% waitbar(PR, h)
end
if ~any(INDICES == INDEX_C)
TMP_DATA.FEATURES = DATA.FEATURES(:,[INDICES INDEX_C end]);
TMP = evaluate_classifier(TMP_DATA, PARAMETERS);
EVAL_MATRIX(INDEX_C) = sum(TMP(:,1) == TMP(:,2))/length(TMP(:,1));
if PARAMETERS.CONSTRAIN_DIAGONAL ~= 0
if all(isnan(TMP) == 0)
CONF_MATRIX_DIAGONAL = diag(generate_confusion_matrix(TMP));
if any(CONF_MATRIX_DIAGONAL(2:end) < PARAMETERS.CONSTRAIN_DIAGONAL)
EVAL_MATRIX(INDEX_C) = EVAL_MATRIX(INDEX_C) - 1;
end
else
EVAL_MATRIX(INDEX_C) = EVAL_MATRIX(INDEX_C) - 1;
end
end
else
EVAL_MATRIX(INDEX_C) = -inf; % to avoid to be selected
end
end
if PARAMETERS.VERBOSE > 0
% close(h)
end
[P_CORRECT, INDEX] = max(EVAL_MATRIX);
end
%% NESTED FUNCTION: get_least_significant_feature
function [INDEX, P_CORRECT] = get_least_significant_feature()
% this function obtains the least significant feature among all
% features that are currently selected
EVAL_MATRIX = zeros(length(INDICES), 1);
for INDEX_C = 1:length(INDICES)
TMP_INDICES = INDICES;
TMP_INDICES(INDEX_C) = [];
TMP_DATA.FEATURES = DATA.FEATURES(:,[TMP_INDICES end]);
TMP = evaluate_classifier(TMP_DATA, PARAMETERS);
EVAL_MATRIX(INDEX_C) = sum(TMP(:,1) == TMP(:,2))/length(TMP(:,1));
if PARAMETERS.CONSTRAIN_DIAGONAL ~= 0
if all(isnan(TMP) == 0)
CONF_MATRIX_DIAGONAL = dia
评论0