function [X, sampleLabels, groupBases, basisDimensions] = generate_samples(varargin)
% [X, sampleLabels, groupBases, basisDimensions] = generate_samples(varargin)
%
% The gpca data generation function to end all test data generation
% functions.
%
% Inputs: (Everything is Optional)
%
% ambientSpaceDimension: (Default is 3)
%
% basisDimensions: A vector the length of the number of groups, containing
% the dimensions of the groups
%
% basisDimensionType: If basisDimensions is not specified, this is one of
% 'hyperplanes' 'lines' 'oneOfEachDimension'
%
% groupDistributionType: One of 'uniformCube', 'uniformSphere', 'normal',
% 'uniformSphereSurface'
%
% groupDistributionStandardDeviations: The standard
% deviation of the distance of the
% points to the origin. Defaults
% to .5 for every group so
% uniformly distributed data
% "fills" the unit sphere.
% Allows you to have some
% groups extend farther in
% the ambient space than others.
%
% WARNING: STD NOT BING COMPUTED PROPERLY FOR ALL DISTRIBUTIONS AT
% MOMENT.
%
%
% groupSizes: A vector
% specifying the number of
% points in each
% group.
%
% noiseType: One of 'multiplicative' or 'additive'
%
% noiseStatistic: One of 'uniform' or 'normal'
%
% noiseLevel: The standard deviation of the noise.
%
% scrambleOrder: One of true or false (i.e. 1 or 0, not the string)
%
% minimumSubspaceAngle: If specified, will try to
% enforces a worst case angle between
% any two subspaces.
%
% Note: The first group is left aligned with the low dimension axes. This
% i.e. if you are displaying a plane and two lines, putting the plane
% first should plot nicely.
ALIGN_FIRST_GROUP = true; % Decide whether or not to align the first group with the axes.
%This can be easier to display, but the presence of zeros in the array can
%make some of the computations later on more difficult (i.e. can't take log)
BASE_SAMPLE_COUNT = 25; % The base number of points to have for a one dimensional group.
% Parse the optional inputs.
if mod(length(varargin), 2) ~= 0,
error(['Extra Parameters passed to the function ''' mfilename ''' must be passed in pairs.']);
end
parameterCount = length(varargin)/2;
for parameterIndex = 1:parameterCount,
parameterName = varargin{parameterIndex*2 - 1};
parameterValue = varargin{parameterIndex*2};
switch lower(parameterName)
case 'ambientspacedimension'
ambientSpaceDimension = parameterValue;
if ambientSpaceDimension < 2 || ~isnumeric(ambientSpaceDimension),
error('The dimension of the ambient space should be an integer larger than one.');
end
case 'basisdimensions'
basisDimensions = parameterValue;
case 'basisDimensionType'
basisDimensionType = parameterValue;
if strcmpi(basisDimensionType, 'hyperplanes')
basisDimensionType = 'hyperplanes';
elseif strcmpi(basisDimensionType, 'lines')
basisDimensionType = 'lines';
elseif strcmpi(basisDimensionType, 'oneOfEachDimension')
basisDimensionType = 'oneOfEachDimension';
else
error('basisDimensionType must be one of ''hyperplanes'' ''lines'' ''oneOfEachDimension''.')
end
case 'groupdistributiontype'
groupDistributionType = parameterValue;
if strcmpi(groupDistributionType, 'uniformcube')
groupDistributionType = 'uniformCube';
elseif strcmpi(groupDistributionType, 'uniformsphere'),
groupDistributionType = 'uniformSphere';
elseif strcmpi(groupDistributionType, 'normal')
groupDistributionType = 'normal';
else
error('groupDistributionType must be one of ''uniformcube'', ''uniformSphere'', ''normal''.')
end
case 'groupdistributionstandarddeviations'
groupDistributionStandardDeviations = parameterValue;
case 'groupsizes'
groupSizes = parameterValue;
case 'noisetype'
noiseType = parameterValue;
if strcmpi(noiseType, 'multiplicative')
noiseType = 'multiplicative';
elseif strcmpi(noiseType, 'additive')
noiseType = 'additive';
else
error('noiseType must be one of ''multiplicative'' or ''additive''.')
end
case 'noisestatistic'
noiseStatistic = parameterValue;
if strcmpi(noiseStatistic, 'uniform'),
noiseStatistic = 'uniform';
elseif strcmpi(noiseStatistic, 'normal')
noiseStatistic = 'normal';
else
error('noiseStatistic must be one of ''uniform'' or ''normal''.')
end
case 'noiselevel'
noiseLevel = parameterValue;
if noiseLevel < 0 || ~isnumeric(noiseLevel),
error('noiseLevel should be a positive or zero numeric value.')
end
case 'scrambleorder'
scrambleOrder = parameterValue;
if ischar(scrambleOrder),
switch lower(scrambleOrder)
case 'true'
scrambleOrder = true;
case 'false'
scrambleOrder = false;
otherwise
error('Value for scrambleOrder should be a logical true or false')
end
end
case 'minimumsubspaceangle'
minimumSubspaceAngle = parameterValue;
if minimumSubspaceAngle < 0 || minimumSubspaceAngle > pi/2,
error('Value for minimumSubspaceAngle must be between 0 and pi/2 radians.')
end
case 'basesamplecount'
BASE_SAMPLE_COUNT = parameterValue;
otherwise
error(['Sorry, the parameter ''' parameterName ''' is not recognized by the function ''' mfilename '''.']);
end
end
% Set Default Values
if ~exist('ambientSpaceDimension','var'),
ambientSpaceDimension = 3; % By Default, three dimensional ambient space. (i.e. the highest we can visualize.)
end
if exist('basisDimensionType','var') && exist('basisDimensions','var'),
warning('It is not necessary to specify both basisDimensionType and basisDimensions');
%elseif ~exist('basisDimensionType','var') && exist('basisDimensions','var'),
elseif exist('basisDimensionType','var') && ~exist('basisDimensions','var'),
switch basisDimensionType
case 'hyperplanes'
basisDimensions = (ambientSpaceDimension - 1)*ones(1,ambientSpaceDimension); % One hyperplane for each dimension
case 'lines'
basisDimensions = ones(1,ambientSpaceDimension); % One line for each dimension
case 'oneOfEachDimension'
basisDimensions = 1:(ambientSpaceDimension - 1); % One of each dimension up to the hyperplane case
end
elseif ~exist('groupDimenisonType','var') && ~exist('basisDimensions','var'),
basisDimensions = [2 1 1]; % One Plane and two lines, our favorite three dimensional test case.
end
groupCount = length(basisDimensions);
% Uniform discs are easier to look at than normal distributions, and easier
% to segment than uniform cubes.
if ~exist('groupDistributionType','var'),
groupDistributionType = 'uniformSphere';
end
% By default, data should vary in magnitude mostly between -1 and 1.
if ~exist('groupDistributionStandardDeviations','va