% Mixture-of-Gaussian density estimation with the Expectation Maximization algorithm.
%
% Inputs:
% - x: a DxN matrix of N data points in a D-dimensional space
% - p: a vector of K initial mixing probabilities, or, if a scalar, the
% value of K, the number of desired mixture components. In the latter
% - m: a DxK matrix with K initial values for the mixture means
% - sigma: a vector of K initial values for the mixture standard deviations
% - tol: a tolerance value; if the relative change of p, m, sigma from one iteration
% to the next does not exceed tol, convergence is declared
% - maxiter: maximum number of iterations
%
% Note: all parameters other than x and p are optional. Default values:
% - m, sigma: initialized by initClusters(x)
% - tol: 1.0e-6 times the standard deviation of the input point coordinates
% - maxiter: 100
%
% Sample input argument lists:
% - em(x, K)
% - em(x, K, [], sigma, [], maxiter)
% - em(x, [], m)
% - em(x, p, m, sigma)
% - em(x, p, m, sigma, tol, maxiter)
%
% Outputs:
% - p: final mixing probabilites
% - m: final mixture means
% - sigma: final mixture standard deviations
% - pkn: a K x N matrix where pkn(k, n) is the probability that data point n belongs to model k
% - niter: number of iterations to convergence. If negative, no convergence occurred
% within maxiter iterations. In that case, niter is set to -maxiter.
function [p, m, sigma, pkn, niter] = em(x, p, m, sigma, tol, maxiter)
if nargin < 2
error('Must provide at least a matrix of data points and a desired number of mixture components')
end
if nargin < 3
m = [];
end
if nargin < 4
sigma = [];
end
if nargin < 5
tol = [];
end
if nargin < 6
maxiter = [];
end
if isempty(tol)
tol = std(x(:)) * 1.0e-6;
end
if isempty(maxiter)
maxiter = 100;
end
% Number of mixture components
if max(size(p)) == 1
% p is really K
K = p;
% Initial mixing probabilities
p = 1/K * ones(1, K);
else
K = length(p);
end
% Provide default initializers as needed
if isempty(p) | isempty(m) | isempty(sigma)
[m0, sigma0, p0] = initClusters(x, K);
if isempty(m)
m = m0;
end
if isempty(sigma)
sigma = sigma0;
end
if isempty(p)
p = p0;
end
end
[N, D, K] = checkSizes(x, p, m, sigma);
% Vectors useful for packaging
oD = ones(D, 1);
oN = ones(1, N);
niter = 0;
while 1
% Remember old values for convergence check
oldp = p;
oldm = m;
oldsigma = sigma;
% 'E' step: compute membership probabilities
for k = 1:K
q(k, :) = p(k) * gauss(x, m(:, k), sigma(k));
end
pkn = q ./ (ones(K, 1) * colsum(q));
% 'M' step: compute mixture component parameters
s = colsum(pkn');
p = s / sum(s);
for k = 1:K
m(:, k) = colsum(((oD * pkn(k, :)) .* x / s(k))')';
sigma(k) = sqrt(sum(colsum((x - m(:, k) * oN) .^ 2) .* pkn(k, :)) / s(k) / D);
end
% Perfect fit. Stop to avoid degeneracies
if max(sigma) == 0
break;
end
% Check for convergence
if converged(m, oldm, tol) & converged(sigma, oldsigma, tol) & converged(p, oldp, tol)
break;
end
% Too many iterations?
niter = niter + 1;
if niter >= maxiter
disp(sprintf('Warning: no convergence within %d iterations', maxiter))
niter = -maxiter;
break;
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function answer = converged(new, old, tolerance)
d = new - old;
% Greatest absolute norm change
delta = max(sqrt(sum(d .^ 2)));
% Mean size
s = mean(sqrt(sum(new .^ 2)));
% Is the relative change small enough?
answer = (delta <= s * tolerance);