clear all;
clc;
authors = 40 % number of authors
txt = 30;
traintxt = 0.9*txt; % number of each author's training set
testtxt = txt-traintxt;
classifier_num=100;
validation_folds = 10 % cross validation's folds
folds=validation_folds;
fid3 = fopen(['E:\2011_research\ENexperiment\result\',num2str(authors),'_',num2str(txt),'_emsembles_pca+svm.txt'], 'w+');
% [label,allsamples]=libsvmread(['E:\2011_research\ENexperiment\ngrams\ngrams_',num2str(authors),'_',num2str(txt),'_','normalized_ig.libsvm']);
% [label,allsamples]=libsvmread('E:\2011_research\ENexperiment\combine_20_30\all_20_10_normalized_ig.libsvm');
allsamples=zeros(authors*txt,20000);
[label,allsamples]=libsvmread(['E:\2011_research\ENexperiment\combine_all\combine_',num2str(authors),'_',num2str(txt),'\all_normalized_ig_',num2str(authors),'_',num2str(txt),'.libsvm']);%30_30->888:85.67%
initial_dimension = size(allsamples,2);
tic
load reduced_sample_10;
now_dimension=size(reduced_sample,2)
% save reduced_sample_20 reduced_sample;
realauthor=0;
count=0;
accuracys=[];
subspace_size=now_dimension/classifier_num;
for j=1:classifier_num
space=0.2*now_dimension+randperm(0.8*now_dimension);
% space=randperm(now_dimension);
subspace(j,1:0.2*now_dimension)=1:0.2*now_dimension;
subspace(j,0.2*now_dimension+1:0.6*now_dimension)=space(0.3*now_dimension+1:0.7*now_dimension);
% subspace(j,:)=space(0.2*now_dimension+1:0.8*now_dimension);
end
% for j=1:classifier_num
% [Sw, Sb, Sm]=scatter_mat(reduced_sample(:,subspace(j,:)),label,authors);
% J3(j)=J3_comp(Sw, Sm);
% end
% [x,ind]=sort(J3);
% maxJ3idx=rot90(ind);
% n-folds cross validation
base_correctnum=zeros(folds,classifier_num);
indices = crossvalind('Kfold',label,10);
for fold=1:folds
testidx = (indices == fold);
trainidx = ~testidx;
traindata = reduced_sample(trainidx,:);
traingroup = label(trainidx,:);
trainlength = length(traingroup);
testdata = reduced_sample(testidx,:);
testgroup = label(testidx,:);
testlength = length(testgroup);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% train models in different subspaces
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
traindata=sparse(traindata);
testdata=sparse(testdata);
for j=1:classifier_num
svmmodel(j)=train(traingroup,traindata(:,subspace(j,:)),'-s 3 -c 1 -B 1');
end
% trainset=full(trainset);
for i=1:testlength
% test(i,:)=test(i,:)-mean(test(i,:));
% test(i,:)=test(i,:)/norm(test(i,:));
finalresult=zeros(1,authors);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ensembles classifying process
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for j=1:classifier_num % multi-classifiers construction
[predict_label, accuracy] = predict(testgroup(i), testdata(i,subspace(j,:)), svmmodel(j));
finalresult(predict_label+1)=finalresult(predict_label+1)+1;
if testgroup(i)==predict_label
base_correctnum(fold,j)=base_correctnum(fold,j)+1;
end
end
finalresult
if length(find(finalresult==max(finalresult)))>1
% sanme number of votes
maxidx=find(finalresult==max(finalresult));
for idx=1:length(maxidx)
% compare distance between test and possible class
% vectors
distance(idx)=dist2(traindata((maxidx(idx)-1)*traintxt+1:maxidx(idx)*traintxt,:),testdata(i,:));
end
y=maxidx(find(min(distance)));
else
[x,y]=max(finalresult);
end
% [x,y]=max(finalresult);
idenidx=y-1;
if testgroup(i)==idenidx
realauthor = realauthor+1;
end
fprintf(fid3, '%d%s%d\r\n', testgroup(i),'->',idenidx);
end
accuracy = (realauthor/(testtxt*authors))*100;
accuracys(fold)=accuracy;
testdata=[];
traindata=[];
count=count+realauthor;
realauthor=0;
end
maxAccuracy = max(accuracys)
totaltest = folds*testtxt*authors
count
allaccuracy=(count/(totaltest))*100
meanaccuracy=mean(accuracys)
base_totalcorrectnum=sum(base_correctnum);
base_totalcorrectnum/totaltest
fprintf(fid3,'%s%d%s%s\r\n','============================',folds,'folds cross validation','============================');
fprintf(fid3,'%s%d%s%d\r\n','initial dimension:',initial_dimension,'->now dimension:',now_dimension);
fprintf(fid3,'%s\t%d\r\n','testset are totally tested in the experiment:', folds*testtxt*authors);
fprintf(fid3,'%s\t%d\r\n','realauthors are identified as true:', count);
fprintf(fid3,'%s\t%f%s\r\n','allaccuracy of identification:',allaccuracy,'%');
fprintf(fid3,'%s\t%f%s\r\n','meanaccuracy of identification:',meanaccuracy,'%');
toc
基于pca的svm多分类器构造使用特征空间随机分割
4星 · 超过85%的资源 需积分: 21 40 浏览量
2011-05-26
16:15:05
上传
评论 1
收藏 2KB RAR 举报
学友
- 粉丝: 1
- 资源: 14