// EM for Mixtures of Factor Analyzers
// Copyright (C) 2005 Kurt Tadayuki Miller and Mark Andrew Paskin
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_blas.h>
#include <cmath>
#include <iostream>
#include <boost/random.hpp>
#include <boost/random/uniform_01.hpp>
#include <boost/random/normal_distribution.hpp>
#include <boost/random/mersenne_twister.hpp>
#include "mfa.hpp"
/**
* (This should later on go in mfa.hpp)
* Todos:
* thoroughly test MFA
* figure out why MFA can have a mean outside of values contained
* in the data
*
* --------------------------------------
* PPCA
* --------------------------------------
*
* PPCA has been thoroughly tested and can be trained in 1 of 3 ways:
* EM, "solve," and "solve_fast."
*
* EM finds the solution in an iterative manner and is slow. However,
* it is the only one that supports missing data points, so if there
* are missing/hidden data points, this is the only method to solve
* the problem.
*
* "solve_fast" reads all data into memory and solves the closed form
* solution. If all the data fits in memory, this is the prefered
* way.
*
* If the data does not fit in memory, "solve" still does the closed
* form solution, but has to go through the data O(N) times and can
* therefore be significantly slower.
*
* -------------------------------------
* FA
* --------------------------------------
*
* FA has been decently tested. If running MFA with one component, it
* is recommended to first run ppca_solve or ppca_solve_fast to
* initialize with a good first estimate. At that point,
* convert_PPCA_to_FA can be used to switch to FA and then use em will
* solve the rest.
*
* -------------------------------------
* MFA
* --------------------------------------
*
* MFA has only been tested on small data sets. With a larger data
* set, it gave unexpected results, but the cause has not been
* determined.
*
* -------------------------------------
* Training
* --------------------------------------
*
* To train data with this class, use mfa_train and add a wrapper
* class that will read in the data and put it in GSL format. An
* example is included in mfa_train.
*
* -------------------------------------
* Testing
* --------------------------------------
*
* To use this as a classifier, use one of the likelihood functions.
* To see what the expected data would be, use get_expected_data.
* This is helpful in testing proper convergence/functionality.
*
*/
//==================== debug utils =========================
void matrix_printf(const gsl_matrix* m, const bool one_line) {
for (unsigned int r = 0; r < m->size1; ++r) {
for (unsigned int c = 0; c < m->size2; ++c)
printf("%.5lf ", gsl_matrix_get(m, r, c));
if (!one_line)
printf("\n");
}
if (one_line)
printf("\n");
}
void vector_printf(const gsl_vector *v) {
for (unsigned int i = 0; i < v->size; ++i)
printf("%.5lf ", gsl_vector_get(v, i));
printf("\n");
}
//======================== mfa_t::fa_info_t ==============================
mfa_t::fa_info_t::fa_info_t(std::size_t m, std::size_t n) {
//allocate memory
this->W = gsl_matrix_alloc(n, m);
this->mu = gsl_vector_alloc(n);
}
mfa_t::fa_info_t::~fa_info_t() {
//free memory
gsl_matrix_free(this->W);
gsl_vector_free(this->mu);
}
//======================== mfa_t::mfa_scratch_t ==============================
mfa_t::mfa_scratch_t::mfa_scratch_t(std::size_t m, std::size_t n,
bool ppca) {
//allocate memory
this->expected_yx = gsl_matrix_alloc(n, m);
this->expected_y = gsl_vector_alloc(n);
this->expected_yy = gsl_vector_alloc(n);
if (ppca)
this->sigma_em = gsl_vector_alloc(1);
else
this->sigma_em = gsl_vector_alloc(n);
this->m1_by_m1_scratch = gsl_matrix_alloc(m+1, m+1);
this->n_by_m1_scratch = gsl_matrix_alloc(n, m+1);
this->m1_permutation = gsl_permutation_alloc(m+1);
}
mfa_t::mfa_scratch_t::~mfa_scratch_t() {
//free memory
gsl_matrix_free(this->expected_yx);
gsl_vector_free(this->expected_y);
gsl_vector_free(this->expected_yy);
gsl_vector_free(this->sigma_em);
gsl_matrix_free(this->m1_by_m1_scratch);
gsl_matrix_free(this->n_by_m1_scratch);
gsl_permutation_free(this->m1_permutation);
}
//====================== mfa_t::fa_factor_scratch_t ==========================
mfa_t::fa_factor_scratch_t::fa_factor_scratch_t(std::size_t m,
std::size_t n) {
//allocate memory
this->W1 = gsl_matrix_alloc(n, m+1);
this->W2 = gsl_matrix_alloc(m+1, m+1);
this->W_em = gsl_matrix_alloc(n, m);
this->sigma1 = gsl_vector_alloc(n);
this->mu_em = gsl_vector_alloc(n);
this->M_j = gsl_matrix_alloc(m, m);
this->M_j_lu = gsl_matrix_alloc(m, m);
this->M_j_inv = gsl_matrix_alloc(m, m);
this->q_j = gsl_vector_alloc(m);
this->m_permutation = gsl_permutation_alloc(m);
}
mfa_t::fa_factor_scratch_t::~fa_factor_scratch_t() {
//free memory
gsl_matrix_free(this->W1);
gsl_matrix_free(this->W2);
gsl_matrix_free(this->W_em);
gsl_vector_free(this->sigma1);
gsl_vector_free(this->mu_em);
gsl_matrix_free(this->M_j);
gsl_matrix_free(this->M_j_inv);
gsl_matrix_free(this->M_j_lu);
gsl_vector_free(this->q_j);
gsl_permutation_free(this->m_permutation);
}
//======================== mfa_t public ==================================
mfa_t::mfa_t(std::string path) throw (std::runtime_error) :
mfa_scratch_ptr(NULL), initialized(false), mu_initialized(false) {
//load the data, which in turn initializes the model
load(path);
}
mfa_t::mfa_t(std::size_t k, std::size_t m, std::size_t n, bool ppca) :
k(k), m(m), n(n), ppca(ppca), mfa_scratch_ptr(NULL), initialized(false),
mu_initialized(false) {
//initialize the model
initialize(k, m, n, ppca);
}
mfa_t::~mfa_t() {
// free any allocated memory
if (initialized) {
gsl_matrix_free(this->expected_xx);
gsl_matrix_free(this->M);
gsl_matrix_free(this->M_lu);
gsl_matrix_free(this->M_inv);
gsl_matrix_free(this->m_by_m_scratch);
gsl_matrix_free(this->n_by_m_scratch);
gsl_vector_free(this->expected_x);
gsl_vector_free(this->q);
gsl_vector_free(this->m_by_1_scratch);
gsl_vector_free(this->n_by_1_scratch);
gsl_vector_free(this->current_data);
gsl_vector_free(this->sigma);
gsl_vector_free(this->sigma_inv);
gsl_permutation_free(this->m_permutation);
gsl_permutation_free(this->n_permutation);
gsl_permutation_free(this->n_permutation_inv);
gsl_vector_free(this->pi);
}
}
void mfa_t::reset_log_likelihood() {
previous_log_likelihood = -std::numeric_limits<double>::infinity();
}
double mfa_t::log_likelihood(const gsl_vector* data) {
//permute the data
gsl_blas_dcopy(data, current_data);
std::size_t num_hidden = compute_permutation(n_permutation, current_data);
gsl_permutation_inverse(n_permutation_inv, n_permutation);
gsl_permute_vector(n_permutation, current_data);
if (!ppca) {
gsl_permute_vector(n_permutation, sigma);
gsl_permute_vector(n_permutation, sigma_inv);
}
//add the log likelihood for each factor
prl::log_t<double> ll;
for (std::size_t j = 0; j < k; ++