function [svm_struct, svIndex] = svmtrain(training, groupnames, varargin)
%SVMTRAIN trains a support vector machine classifier
%
% SVMStruct = SVMTRAIN(TRAINING,GROUP) trains a support vector machine
% classifier using data TRAINING taken from two groups given by GROUP.
% SVMStruct contains information about the trained classifier, including
% the support vectors, that is used by SVMCLASSIFY for classification.
% GROUP is a column vector of values of the same length as TRAINING that
% defines two groups. Each element of GROUP specifies the group the
% corresponding row of TRAINING belongs to. GROUP can be a numeric
% vector, a string array, or a cell array of strings. SVMTRAIN treats
% NaNs or empty strings in GROUP as missing values and ignores the
% corresponding rows of TRAINING.
%
% SVMTRAIN(...,'KERNEL_FUNCTION',KFUN) allows you to specify the kernel
% function KFUN used to map the training data into kernel space. The
% default kernel function is the dot product. KFUN can be one of the
% following strings or a function handle:
%
% 'linear' Linear kernel or dot product
% 'quadratic' Quadratic kernel
% 'polynomial' Polynomial kernel (default order 3)
% 'rbf' Gaussian Radial Basis Function kernel
% 'mlp' Multilayer Perceptron kernel (default scale 1)
% function A kernel function specified using @,
% for example @KFUN, or an anonymous function
%
% A kernel function must be of the form
%
% function K = KFUN(U, V)
%
% The returned value, K, is a matrix of size M-by-N, where U and V have M
% and N rows respectively. If KFUN is parameterized, you can use
% anonymous functions to capture the problem-dependent parameters. For
% example, suppose that your kernel function is
%
% function k = kfun(u,v,p1,p2)
% k = tanh(p1*(u*v')+p2);
%
% You can set values for p1 and p2 and then use an anonymous function:
% @(u,v) kfun(u,v,p1,p2).
%
% SVMTRAIN(...,'RBF_SIGMA',SIGMA) allows you to specify the scaling
% factor, sigma, in the radial basis function kernel.
%
% SVMTRAIN(...,'POLYORDER',ORDER) allows you to specify the order of a
% polynomial kernel. The default order is 3.
%
% SVMTRAIN(...,'MLP_PARAMS',[P1 P2]) allows you to specify the
% parameters of the Multilayer Perceptron (mlp) kernel. The mlp kernel
% requires two parameters, P1 and P2, where K = tanh(P1*U*V' + P2) and P1
% > 0 and P2 < 0. Default values are P1 = 1 and P2 = -1.
%
% SVMTRAIN(...,'METHOD',METHOD) allows you to specify the method used
% to find the separating hyperplane. Options are
%
% 'QP' Use quadratic programming (requires the Optimization Toolbox)
% 'LS' Use least-squares method
%
% If you have the Optimization Toolbox, then the QP method is the default
% method. If not, the only available method is LS. When using the QP
% method, the classifier is a 2-norm soft-margin support vector machine.
%
% SVMTRAIN(...,'QUADPROG_OPTS',OPTIONS) allows you to pass an OPTIONS
% structure created using OPTIMSET to the QUADPROG function when using
% the 'QP' method. See help optimset for more details.
%
% SVMTRAIN(...,'BOXCONSTRAINT',C) allows you to set the box constraint C
% for the soft margin. The default value is 1 / sqrt(eps), which will
% only classify separable data. C can be a scalar or a vector of the same
% length as the training data.
%
% SVMTRAIN(...,'AUTOSCALE',true) will automatically shift and scale the
% data points before training.
%
% SVMTRAIN(...,'SHOWPLOT',true), when used with two-dimensional data,
% creates a plot of the grouped data and plots the separating line for
% the classifier.
%
% Example:
% % Load the data and select features for classification
% load fisheriris
% data = [meas(:,1), meas(:,2)];
% % Extract the Setosa class
% groups = ismember(species,'setosa');
% % Randomly select training and test sets
% [train, test] = crossvalind('holdOut',groups);
% cp = classperf(groups);
% % Use a linear support vector machine classifier
% svmStruct = svmtrain(data(train,:),groups(train),'showplot',true);
% classes = svmclassify(svmStruct,data(test,:),'showplot',true);
% % See how well the classifier performed
% classperf(cp,classes,test);
% cp.CorrectRate
%
% See also CLASSIFY, KNNCLASSIFY, QUADPROG, SVMCLASSIFY.
% Copyright 2004-2006 The MathWorks, Inc.
% $Revision: 1.1.12.4 $ $Date: 2006/05/17 20:49:01 $
% References:
%
% [1] Cristianini, N., Shawe-Taylor, J An Introduction to Support
% Vector Machines, Cambridge University Press, Cambridge, UK. 2000.
% http://www.support-vector.net
% [2] Kecman, V, Learning and Soft Computing,
% MIT Press, Cambridge, MA. 2001.
% [3] Suykens, J.A.K., Van Gestel, T., De Brabanter, J., De Moor, B.,
% Vandewalle, J., Least Squares Support Vector Machines,
% World Scientific, Singapore, 2002.
%
% SVMTRAIN(...,'KFUNARGS',ARGS) allows you to pass additional
% arguments to kernel functions.
% set defaults
plotflag = false;
% The large scale solver cannot handle this type of problem, so turn it
% off.
qp_opts = optimset('LargeScale','Off','display','off');
kfunargs = {};
setPoly = false; usePoly = false;
setMLP = false; useMLP = false;
setSigma = false; useSigma = false;
autoScale = false;
if ~isempty(which('quadprog'))
useQuadprog = true;
else
useQuadprog = false;
end
% set default kernel function
kfun = @linear_kernel;
% check inputs
if nargin < 2
error(nargchk(2,Inf,nargin))
end
numoptargs = nargin -2;
optargs = varargin;
% grp2idx sorts a numeric grouping var ascending, and a string grouping
% var by order of first occurrence
[groupIndex, groupString] = grp2idx(groupnames);
% check group is a vector -- though char input is special...
if ~isvector(groupnames) && ~ischar(groupnames)
error('Bioinfo:svmtrain:GroupNotVector',...
'Group must be a vector.');
end
% make sure that the data are correctly oriented.
if size(groupnames,1) == 1
groupnames = groupnames';
end
% make sure data is the right size
if size(training,1) ~= length(groupnames)
if size(training,2) == length(groupnames)
training = training';
else
error('Bioinfo:svmtrain:DataGroupSizeMismatch',...
'GROUP and TRAINING must have the same number of rows.')
end
end
% check for NaN in data matrix:
if any(isnan(training(:)))
error('Bioinfo:svmtrain:NaNinDataMatrix', ...
'TRAINING data must not contain missing values');
end
% NaNs are treated as unknown classes and are removed from the training
% data
nans = find(isnan(groupIndex));
if length(nans) > 0
training(nans,:) = [];
groupIndex(nans) = [];
end
ngroups = length(groupString);
nPoints = length(groupIndex);
% set default value of box constraint
boxconstraint = 1 / sqrt(eps) * ones(1, nPoints);
if ngroups > 2
error('Bioinfo:svmtrain:TooManyGroups',...
'SVMTRAIN only supports classification into two groups.\nGROUP contains %d different groups.',ngroups)
end
% convert to 1, -1.
groupIndex = 1 - (2* (groupIndex-1));
% handle optional arguments
if numoptargs >= 1
if rem(numoptargs,2)== 1
error('Bioinfo:svmtrain:IncorrectNumberOfArguments',...
'Incorrect number of arguments to %s.',mfilename);
end
okargs = {'kernel_function','method','showplot','kfunargs',...
'quadprog_opts','polyorder','mlp_params',...
'boxconstraint','rbf_sigma','autoscale'};
for j=1:2:numoptargs
pname = optargs{j};
pval = optargs{j+1};
k = strmatch(lower(pname), okargs);%#ok
if isempty(k)
error('Bioinfo:svmtrain:UnknownParameterName',...
'Unknown parameter name: %s.',pname);
elseif length(k)>1
error('Bioinfo:svmtrain:AmbiguousParameterName',...
'Ambig
评论0