function [a, b, g, inds, inde, indw] = svcm_train(x, y, C);
% function [a, b, g, inds, inde, indw] = svcm_train(x, y, C);
% support vector classification machine
% incremental learning, and leave-one-out cross-validation
% soft margin
% uses "kernel.m"
%
% x: independent variable, (L,N) with L: number of points; N: dimension
% y: dependent variable, (L,1) containing class labels (-1 or +1)
% C: soft-margin regularization constant
%
% a: alpha coefficients (to be multiplied by y)
% b: offset coefficient
% g: derivatives (adding one yields margins for each point)
% inds: indices of support vectors
% inde: indices of error vectors
% indw: indices of wrongly classified leave-one-out vectors
%%%%%%%%%% version 1.1; last revised 02/12/2001; send comments to gert@jhu.edu %%%%%%%%%%
%%% GLOBAL VARIABLES:
global online query terse verbose debug memoryhog visualize
if isempty(online)
online = 0; % take data in the order it is presented
end
if isempty(query)
query = 0; % choose next point based on margin distribution
end
if isempty(terse)
terse = 0; % print out only final results
end
if isempty(verbose)
verbose = 0; % print out details of intermediate results
end
if isempty(debug)
debug = 0; % use only for debugging; slows it down significantly
end
if isempty(memoryhog)
memoryhog = 0; % use more memory; good only if kernel dominates computation
end
if isempty(visualize)
visualize = 0; % record and plot trajectory of coeffs. a, g, and leave-one-out g
end
%%% END GLOBAL VARIABLES
[L,N] = size(x);
[Ly,Ny] = size(y);
if Ly~=L
fprintf('svcm_train error: x and y different number of data points (%g/%g)\n\n', L, Ly);
return
elseif Ny~=1
fprintf('svcm_train error: y not a single variable (%g)\n\n', Ny);
return
elseif any(y~=-1&y~=1)
fprintf('svcm_train error: y takes values different from {-1,+1}\n\n');
return
end
eps = 1e-6; % margin "margin"; for numerical stability when Q is semi-positive definite
eps2 = 2*eps/C;
tol = 1e-6; % tolerance on derivatives at convergence, and their recursive computation
fprintf('Support vector soft-margin classifier with incremental learning\n')
fprintf(' %g training points\n', L)
fprintf(' %g dimensions\n\n', N)
keepe = debug|memoryhog; % store Qe for error vectors as "kernel cache"
keepr = debug|memoryhog; % store Qr for recycled support vectors as "kernel cache"
if verbose
terse = 0;
if debug
fprintf('debugging mode (slower, more memory intensive)\n\n')
elseif memoryhog
fprintf('memoryhog active (fewer kernel evaluations, more memory intensive)\n\n')
end
fprintf('kernel used:\n')
help kernel
fprintf('\n')
end
a = zeros(L,1); % coefficients, sparse
b = 0; % offset
W = 0; % energy function
g = -(1+eps)*ones(L,1); % derivative of energy function
inds = []; % indices of support vectors; none initially
inde = []; % indices of error vectors; none initially
indo = (L:-1:1)'; % indices of other vectors; all initially
indr = []; % indices of "recycled" other vectors; for memory "caching"
indl = []; % indices of leave-one-out vectors (still to be) considered
indw = []; % indices of wrongly classified leave-one-out vectors
ls = length(inds); % number of support vectors;
le = length(inde); % number of error vectors;
la = ls+le; % both
lo = length(indo); % number of other vectors;
lr = length(indr); % number of recycled vectors
lw = length(indw); % number or wrongly classified leave-one-out vectors
processed = zeros(L,1); % keeps track of which points have been processed
R = Inf; % inverse hessian (a(inds) and b only)
Qs = y'; % extended hessian; (a(inds) plus b, and all vectors)
Qe = []; % same, for inde ("cache" for Qs)
Qr = []; % same, for indr ("cache" for Qs)
Qc = []; % same, for indc (used for gamma and Qs)
if visualize % for visualization
figure(1)
hold off
clf
axis([-0.1*C, 1.1*C, -1.2, 0.2])
gctraj = [];
figure(2)
hold off
clf
figure(3)
hold off
clf
end
iter = 0; % iteration count
memcount = 0; % memory usage
kernelcount = 0; % kernel computations, counted one "row" (epoch) at a time
training = 1; % first do training recursion ...
leaveoneout = 0; % ... then do leave-one-out sequence (with retraining)
indc = 0; % candidate vector
indco = 0; % leave-one-out vector
indso = 0; % a recycled support vector; used as buffer
free = a(indo)>0|g(indo)<0; % free, candidate support or error vector
left = indo(free); % candidates left
continued = any(left);
while continued % check for remaining free points or leave-one-outs to process
% select candidate indc
indc_prev = indc;
if online & indc_prev>0
if query
processed(indc_prev) = 1; % record last point in the history log
else
processed(1:indc_prev) = 1; % record last and all preceding points
end
end
if query
% [gindc, indc] = max(g(left)); % closest to the margin
[gindc, indc] = min(g(left)); % greedy; worst margin
indc = left(indc);
else
indc = left(length(left)); % take top of the stack, "last-in, first-out"
end
% get Qc, row of hessian corresponding to indc (needed for gamma)
if keepr & lr>0 & ... % check for match among recycled vectors
any(find(indr==indc))
ir = find(indr==indc); % found, reuse
Qc = Qr(ir,:);
indr = indr([1:ir-1,ir+1:lr]); % ... remove from indr
Qr = Qr([1:ir-1,ir+1:lr],:); % ... and Qr
lr = lr-1;
elseif indc==indso % support vector from previous iteration, leftover in memory
Qc = Qso;
elseif ls>0 & ... % check for match among support vectors
any(find(inds==indc))
is = find(inds==indc); % found, reuse
Qc = Qs(is+1,:);
elseif keepe & le>0 & ... % check for match among stored error vectors
any(find(inde==indc))
ie = find(inde==indc); % found, reuse
Qc = Qe(ie,:);
elseif indc~=indc_prev % not (or no longer) available, compute
xc = x(indc,:);
yc = y(indc);
Qc = (yc*y').*kernel(xc,x);
Qc(indc) = Qc(indc)+eps2;
kernelcount = kernelcount+1;
end
% prepare to increment/decrement z = a(indc)' or y(indc)*b, subject to constraints.
% move z up when adding indc ((re-)training), down when removing indc (leave-one-out or g>0)
upc = ~leaveoneout & (g(indc)<=0);
polc = 2*upc-1; % polarity of increment in z
beta = -R*Qs(:,indc); % change in [b;a(inds)] per change in a(indc)
if ls>0
% move z = a(indc)'
gamma = Qc'+Qs'*beta; % change in g(:) per change in z = a(indc)'
z0 = a(indc); % initial z value
zlim = max(0,C*polc); % constraint on a(indc)
else % ls==0
% move z = y(indc)*b and keep a(indc) constant; there is no a(:) free to move in inds!
gamma = y(indc)