function [D, a_star] = SVM(train_features, train_targets, params, region)
% Classify using (a very simple implementation of) the support vector machine algorithm
%
% Inputs:
% features- Train features
% targets - Train targets
% params - [kernel, kernel parameter, solver type, Slack]
% Kernel can be one of: Gauss, RBF (Same as Gauss), Poly, Sigmoid, or Linear
% The kernel parameters are:
% RBF kernel - Gaussian width (One parameter)
% Poly kernel - Polynomial degree
% Sigmoid - The slope and constant of the sigmoid (in the format [1 2], with no separating commas)
% Linear - None needed
% Solver type can be one of: Perceptron, Quadprog
% region - Decision region vector: [-x x -y y number_of_points]
%
% Outputs
% D - Decision sufrace
% a - SVM coeficients
%
% Note: The number of support vectors found will usually be larger than is actually
% needed because both solvers are approximate.
[Dim, Nf] = size(train_features);
Dim = Dim + 1;
train_features(Dim,:) = ones(1,Nf);
z = 2*(train_targets>0) - 1;
%Get kernel parameters
[kernel, ker_param, solver, slack] = process_params(params);
%Transform the input features
y = zeros(Nf);
switch kernel,
case {'Gauss','RBF'},
for i = 1:Nf,
y(:,i) = exp(-sum((train_features-train_features(:,i)*ones(1,Nf)).^2)'/(2*ker_param^2));
end
case {'Poly', 'Linear'}
if strcmp(kernel, 'Linear')
ker_param = 1;
end
for i = 1:Nf,
y(:,i) = (train_features'*train_features(:,i) + 1).^ker_param;
end
case 'Sigmoid'
if (length(ker_param) ~= 2)
error('This kernel needs two parameters to operate!')
end
for i = 1:Nf,
y(:,i) = tanh(train_features'*train_features(:,i)*ker_param(1)+ker_param(2));
end
otherwise
error('Unknown kernel. Can be Gauss, Linear, Poly, or Sigmoid.')
end
%Find the SVM coefficients
switch solver
case 'Quadprog'
%Quadratic programming
if ~isfinite(slack)
alpha_star = quadprog((z'*z).*(y'*y), -ones(1, Nf), [], [], z, 0, 0)';
else
alpha_star = quadprog((z'*z).*(y'*y), -ones(1, Nf), [], [], z, 0, 0, slack)';
end
a_star = (alpha_star.*z)*y';
%Find the bias
in = find((alpha_star > 0) & (alpha_star < slack));
if isempty(in),
bias = 0;
else
B = z(in) - a_star * y(:,in);
bias = mean(B(in));
end
case 'Perceptron'
max_iter = 1e5;
iter = 0;
rate = 0.01;
xi = ones(1,Nf)/Nf*slack;
if ~isfinite(slack),
slack = 0;
end
%Find a start point
processed_y = [y; ones(1,Nf)] .* (ones(Nf+1,1)*z);
a_star = mean(processed_y')';
while ((sum(sign(a_star'*processed_y+xi)~=1)>0) & (iter < max_iter))
iter = iter + 1;
if (iter/5000 == floor(iter/5000)),
disp(['Working on iteration number ' num2str(iter)])
end
%Find the worse classified sample (That farthest from the border)
dist = a_star'*processed_y+xi;
[m, indice] = min(dist);
a_star = a_star + rate*processed_y(:,indice);
%Calculate the new slack vector
xi(indice) = xi(indice) + rate;
xi = xi / sum(xi) * slack;
end
if (iter == max_iter),
disp(['Maximum iteration (' num2str(max_iter) ') reached']);
else
disp(['Converged after ' num2str(iter) ' iterations.'])
end
bias = 0;
a_star = a_star(1:Nf)';
case 'Lagrangian'
%Lagrangian SVM (See Mangasarian & Musicant, Lagrangian Support Vector Machines)
tol = 1e-5;
max_iter = 1e5;
nu = 1/Nf;
iter = 0;
D = diag(z);
alpha = 1.9/nu;
e = ones(Nf,1);
I = speye(Nf);
Q = I/nu + D*y'*D;
P = inv(Q);
u = P*e;
oldu = u + 1;
while ((iter<max_iter) & (sum(sum((oldu-u).^2)) > tol)),
iter = iter + 1;
if (iter/5000 == floor(iter/5000)),
disp(['Working on iteration number ' num2str(iter)])
end
oldu = u;
f = Q*u-1-alpha*u;
u = P*(1+(abs(f)+f)/2);
end
a_star = y*D*u(1:Nf);
bias = -e'*D*u;
otherwise
error('Unknown solver. Can be either Quadprog or Perceptron')
end
%Find support verctors
sv = find(abs(a_star) > 1e-10);
Nsv = length(sv);
if isempty(sv),
error('No support vectors found');
else
disp(['Found ' num2str(Nsv) ' support vectors'])
end
%Margin
b = 1/sqrt(sum(a_star.^2));
disp(['The margin is ' num2str(b)])
%Now build the decision region
N = region(5);
xx = linspace (region(1),region(2),N);
yy = linspace (region(3),region(4),N);
D = zeros(N);
for j = 1:N,
y = zeros(N,1);
for i = 1:Nsv,
data = [xx(j)*ones(1,N); yy; ones(1,N)];
switch kernel,
case {'Gauss','RBF'},
y = y + a_star(i) * exp(-sum((data-train_features(:,sv(i))*ones(1,N)).^2)'/(2*ker_param^2));
case {'Poly', 'Linear'}
y = y + a_star(i) * (data'*train_features(:,sv(i))+1).^ker_param;
case 'Sigmoid'
y = y + a_star(i) * tanh(data'*train_features(:,sv(i))*ker_param(1)+ker_param(2));
end
end
D(:,j) = (y + bias);
end
评论0