clc;
clear;
close all;
warning off;
addpath(genpath(pwd));
%%======================================================
%% STEP 1a: Generate data from two 2D distributions.
mu1 = [1 2]; % Mean
sigma1 = [ 3 .2; % Covariance matrix
.2 2];
m1 = 200; % Number of data points.
mu2 = [-1 -2];
sigma2 = [2 0;
0 1];
m2 = 100;
% Generate sample points with the specified means and covariance matrices.
R1 = chol(sigma1);
X1 = randn(m1, 2) * R1;
X1 = X1 + repmat(mu1, size(X1, 1), 1);
R2 = chol(sigma2);
X2 = randn(m2, 2) * R2;
X2 = X2 + repmat(mu2, size(X2, 1), 1);
X = [X1; X2];
%%=====================================================
%% STEP 1b: Plot the data points and their pdfs.
figure(1);
% Display a scatter plot of the two distributions.
hold off;
plot(X1(:, 1), X1(:, 2), 'bo');
hold on;
plot(X2(:, 1), X2(:, 2), 'ro');
set(gcf,'color','white') % White background for the figure.
% First, create a [10,000 x 2] matrix 'gridX' of coordinates representing
% the input values over the grid.
gridSize = 100;
u = linspace(-6, 6, gridSize);
[A B] = meshgrid(u, u);
gridX = [A(:), B(:)];
% Calculate the Gaussian response for every value in the grid.
z1 = gaussianND(gridX, mu1, sigma1);
z2 = gaussianND(gridX, mu2, sigma2);
% Reshape the responses back into a 2D grid to be plotted with contour.
Z1 = reshape(z1, gridSize, gridSize);
Z2 = reshape(z2, gridSize, gridSize);
% Plot the contour lines to show the pdf over the data.
[C, h] = contour(u, u, Z1);
[C, h] = contour(u, u, Z2);
axis([-6 6 -6 6])
title('Original Data and PDFs');
%set(h,'ShowText','on','TextStep',get(h,'LevelStep')*2);
%%====================================================
%% STEP 2: Choose initial values for the parameters.
% Set 'm' to the number of data points.
m = size(X, 1);
k = 2; % The number of clusters.
n = 2; % The vector lengths.
% Randomly select k data points to serve as the initial means.
indeces = randperm(m);
mu = X(indeces(1:k), :);
sigma = [];
% Use the overal covariance of the dataset as the initial variance for each cluster.
for (j = 1 : k)
sigma{j} = cov(X);
end
% Assign equal prior probabilities to each cluster.
phi = ones(1, k) * (1 / k);
%%===================================================
%% STEP 3: Run Expectation Maximization
% Matrix to hold the probability that each data point belongs to each cluster.
% One row per data point, one column per cluster.
W = zeros(m, k);
% Loop until convergence.
for (iter = 1:1000)
% fprintf(' EM Iteration %d\n', iter);
%%===============================================
%% STEP 3a: Expectation
%
% Calculate the probability for each data point for each distribution.
% Matrix to hold the pdf value for each every data point for every cluster.
% One row per data point, one column per cluster.
pdf = zeros(m, k);
% For each cluster...
for (j = 1 : k)
% Evaluate the Gaussian for all data points for cluster 'j'.
pdf(:, j) = gaussianND(X, mu(j, :), sigma{j});
end
% Multiply each pdf value by the prior probability for cluster.
% pdf [m x k]
% phi [1 x k]
% pdf_w [m x k]
pdf_w = bsxfun(@times, pdf, phi);
% Divide the weighted probabilities by the sum of weighted probabilities for each cluster.
% sum(pdf_w, 2) -- sum over the clusters.
W = bsxfun(@rdivide, pdf_w, sum(pdf_w, 2));
%%===============================================
%% STEP 3b: Maximization
%%
%% Calculate the probability for each data point for each distribution.
% Store the previous means.
prevMu = mu;
% For each of the clusters...
for (j = 1 : k)
% Calculate the prior probability for cluster 'j'.
phi(j) = mean(W(:, j), 1);
% Calculate the new mean for cluster 'j' by taking the weighted
% average of all data points.
mu(j, :) = weightedAverage(W(:, j), X);
% Calculate the covariance matrix for cluster 'j' by taking the
% weighted average of the covariance for each training example.
sigma_k = zeros(n, n);
% Subtract the cluster mean from all data points.
Xm = bsxfun(@minus, X, mu(j, :));
% Calculate the contribution of each training example to the covariance matrix.
for (i = 1 : m)
sigma_k = sigma_k + (W(i, j) .* (Xm(i, :)' * Xm(i, :)));
end
% Divide by the sum of weights.
sigma{j} = sigma_k ./ sum(W(:, j));
end
% Check for convergence.
if (mu == prevMu)
break
end
% End of Expectation Maximization
end
%%=====================================================
%% STEP 4: Plot the data points and their estimated pdfs.
% Display a scatter plot of the two distributions.
figure(2);
hold off;
plot(X1(:, 1), X1(:, 2), 'bo');
hold on;
plot(X2(:, 1), X2(:, 2), 'ro');
set(gcf,'color','white') % White background for the figure.
plot(mu1(1), mu1(2), 'kx');
plot(mu2(1), mu2(2), 'kx');
% First, create a [10,000 x 2] matrix 'gridX' of coordinates representing
% the input values over the grid.
gridSize = 100;
u = linspace(-6, 6, gridSize);
[A B] = meshgrid(u, u);
gridX = [A(:), B(:)];
% Calculate the Gaussian response for every value in the grid.
z1 = gaussianND(gridX, mu(1, :), sigma{1});
z2 = gaussianND(gridX, mu(2, :), sigma{2});
% Reshape the responses back into a 2D grid to be plotted with contour.
Z1 = reshape(z1, gridSize, gridSize);
Z2 = reshape(z2, gridSize, gridSize);
% Plot the contour lines to show the pdf over the data.
[C, h] = contour(u, u, Z1);
[C, h] = contour(u, u, Z2);
axis([-6 6 -6 6])
title('Original Data and Estimated PDFs');
matlab-基于EM算法的一维GMM和二维GMM模型参数估计matlab仿真-源码
版权申诉
153 浏览量
2021-09-30
21:23:35
上传
评论
收藏 5KB RAR 举报
mYlEaVeiSmVp
- 粉丝: 1875
- 资源: 19万+
最新资源
- AIS2024 valid
- 最入门的爬虫代码 python.docx
- 爬虫零基础入门-爬取天气预报.pdf
- 最通俗易懂的 MongoDB 非结构化文档存储数据库教程.zip
- 以mongodb为数据库的订单物流小项目.zip
- 腾讯云-mongodb数据库, 项目部署.zip
- 腾讯 APIJSON 的 MongoDB 数据库插件.zip
- 理解非关系型数据库和关系型数据库的区别.zip
- 操作简单的Mongodb网页web管理工具,基于Spring Boot2.0支持mongodb集群.zip
- tms-mongodb-web,提供访问mongodb数据的REST API和可灵活扩展的mongodb web 客户端.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈