#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern short codelen;
int classCount( short*, int, int*, int );
void randomOrder( int, int* );
int classTable( char*, int, char*, short* );
void insertTable( char*, char*, int, int );
int posInTable( char*, char*, int );
// 使每个类的向量成均匀分布
void MpfSort( int argc, char** argv )
{
int n;
if( argc<2 )
{
printf( "Make the vectors of each class distribute uniformly\n" );
printf( "argv[1]: title of mpf file.\n" );
printf( "argv[2]: (optional) \"class\", \"sample\" (default), or \"random\".\n" );
exit( 0 );
}
FILE *fpIn, *fpOut;
char fname[100];
sprintf( fname, "%s.mpf", argv[1] );
fpIn = fopen( fname, "rb" );
if( fpIn==0 )
{
printf( "Cannot open the file %s\n", fname );
exit( 1 );
}
int randOrder = 1; // default: sample order 按样本排序??
if( argc>2 )
{
if( !strcmp(argv[2], "class") )
{
printf( "Sort the samples in class order.\n" );
randOrder = 0;
}
if( !strcmp(argv[2], "random") ) // strcmp函数相等返回0
{
printf( "Random ordering the samples of each class.\n" );
randOrder = 2;
}
}
if( randOrder==1 )
sprintf( fname, "%s-s.mpf", argv[1] );
else if( randOrder==2 )
sprintf( fname, "%s-r.mpf", argv[1] );
else
sprintf( fname, "%s-c.mpf", argv[1] );
fpOut = fopen( fname, "wb" ); // 以写的模式打开一个文件
int mpfhsize;
char datatype[20];
int sampNum, ftrDim;
fread( &mpfhsize, 4, 1, fpIn );
fseek( fpIn, mpfhsize-30, 0 );
fread( &codelen, 2, 1, fpIn );
fread( datatype, 20, 1, fpIn );
fread( &sampNum, 4, 1, fpIn );
fread( &ftrDim, 4, 1, fpIn );
printf( "%d samples, %d feature\n", sampNum, ftrDim );//输出样本数和特征维数
int dtypelen;
long fpLen;
fseek( fpIn, 0, 2 );
fpLen = ftell( fpIn );
dtypelen = (fpLen-mpfhsize-sampNum*codelen)/(sampNum*ftrDim);
// Copy the mpf header to output file
char* buff;
buff = new char [mpfhsize];
fseek( fpIn, 0, 0 );
fread( buff, mpfhsize, 1, fpIn );
fwrite( buff, mpfhsize, 1, fpOut );
delete buff;
char* labels;
int samplen;
samplen = codelen+ftrDim*dtypelen;
labels = new char [sampNum*codelen];
int slen;
for( n=0; n<sampNum; n++ )
{
fseek( fpIn, mpfhsize+n*samplen, 0 );
fread( labels+n*codelen, codelen, 1, fpIn );
slen = strlen( labels+n*codelen );
if( slen<codelen )
memset( labels+n*codelen+slen, 0, codelen-slen );
}
char* codetable; // class codes
int classNum;
short* truth; // class indes
codetable = new char [20000*codelen]; // at most 20000 classes
truth = new short [sampNum];
classNum = classTable( labels, sampNum, codetable, truth );
printf( "%d classes\n", classNum ); // 输出有多少个类
int* csnum; // class-specific sample numbers
csnum = new int [classNum];
int maxNum;
maxNum = classCount( truth, sampNum, csnum, classNum );
int ci, tn;
int** sampOrder;
int* torder;
sampOrder = new int* [classNum];
for( ci=0; ci<classNum; ci++ )
{
sampOrder[ci] = new int [ csnum[ci] ];
tn = 0;
for( n=0; n<sampNum; n++ )
{
if( truth[n]==ci )
sampOrder[ci][tn++] = n; // position in the whole set
}
if( randOrder==2 )
{
torder = new int [ csnum[ci] ];
randomOrder( csnum[ci], torder );
for(int n=0; n<csnum[ci]; n++ )
torder[n] = sampOrder[ci][ torder[n] ];
memcpy( sampOrder[ci], torder, csnum[ci]*sizeof(int) );
delete torder;
}
}
printf( "Output file: %s\n", fname );
if( randOrder==0 ) // class order
{
int si;
for( ci=0; ci<classNum; ci++ )
{
for( tn=0; tn<csnum[ci]; tn++ )
{
si = sampOrder[ci][tn];
buff = new char [ samplen ];
fseek( fpIn, mpfhsize+si*samplen, 0 );
fread( buff, samplen, 1, fpIn );
fwrite( buff, samplen, 1, fpOut );
delete buff;
}
}
goto CLOSE;
}
int* csi; // sample index of each class
csi = new int [classNum];
memset( csi, 0, classNum*sizeof(int) );
int index, gsi;
for( n=1; n<=maxNum; n++ )
{
printf( "%d of %d\r", n, maxNum );
for( ci=0; ci<classNum; ci++ )
{
index = (_int64)n*csnum[ci]/maxNum;
if( index>csi[ci] ) // index<=csi[ci]+1
{
gsi = sampOrder[ci][ csi[ci] ]; // position in the whole set
buff = new char [ samplen ];
fseek( fpIn, mpfhsize+gsi*samplen, 0 );
fread( buff, samplen, 1, fpIn );
fwrite( buff, samplen, 1, fpOut );
delete buff;
csi[ci] = index;
}
}
}
printf( "\n" );
delete csi;
CLOSE:
for( ci=0; ci<classNum; ci++ )
delete sampOrder[ci];
delete sampOrder;
delete csnum;
delete codetable;
delete truth;
delete labels;
fclose( fpIn );
fclose( fpOut );
}
void randomOrder( int num, int* index )
{
char* mark;
mark = new char [num];
memset( mark, 0, num );
int tn;
int i, j;
for( i=0; i<num; i++ )
{
tn = rand()%(num-i);
for( j=0; j<=tn; j++ )
{
if( mark[j] )
tn ++;
}
index[i] = tn;
mark[tn] = 1;
}
delete mark;
}
// Count the number of samples of each class 计算每个类的样本数
int classCount( short* truth, int sampNum, int* csnum, int classNum )
{
int n;
memset( csnum, 0, classNum*sizeof(int) );
for( n=0; n<sampNum; n++ )
csnum[ truth[n] ] ++;
int maxNum = 0;
for( n=0; n<classNum; n++ )
{
if( classNum<=100 )
printf( "%d, ", csnum[n] );
if( csnum[n]>maxNum )
maxNum = csnum[n];
}
if( classNum<=100 )
printf( "\n" );
return maxNum;
}
// Sort the class codes of ALL samples and assign index numbers
int classTable( char* labels, int sampNum, char* table, short* truth )
{
int cnum = 0;
int pos; // position in code table
int n;
for( n=0; n<sampNum; n++ )
{
pos = posInTable( labels+n*codelen, table, cnum );
if( pos==cnum )
{
memcpy( table+cnum*codelen, labels+n*codelen, codelen );
cnum ++;
}
else if( memcmp(labels+n*codelen, table+pos*codelen, codelen) )
{
insertTable( labels+n*codelen, table, cnum, pos );
cnum ++;
}
}
for( n=0; n<sampNum; n++ )
truth[n] = posInTable( labels+n*codelen, table, cnum );
return cnum;
}
void insertTable( char* label, char* table, int cnum, int pos )
{
memmove( table+(pos+1)*codelen, table+pos*codelen, (cnum-pos)*codelen );
memcpy( table+pos*codelen, label, codelen );
}
// The codes are sorted in ascending order in the table
int posInTable( char* label, char* table, int cnum )
{
if( cnum==0 )
return 0;
if( memcmp(label, table, codelen)<=0 )
return 0;
else if( memcmp(label, table+(cnum-1)*codelen, codelen)>0 )
return cnum;
int b1, b2, t;
b1 = 0;
b2 = cnum-1;
while( b2-b1>1 )
{
t = (b1+b2)/2;
if( memcmp(label, table+t*codelen, codelen)>0 )
b1 = t;
else
b2 = t;
}
return b2;
}
没有合适的资源?快使用搜索试试~ 我知道了~
MPF_Util.rar_mpf
共29个文件
sbr:6个
cpp:6个
obj:6个
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 50 浏览量
2022-09-24
01:29:50
上传
评论
收藏 190KB RAR 举报
温馨提示
本程序的功能是对MPF文件的处理,包括分割、合并、删除等,为训练分类器做准备
资源详情
资源评论
资源推荐
收起资源包目录
MPF_Util.rar (29个子文件)
MPF_Util
Utility.cpp 622B
MpfMerge.cpp 3KB
MpfSort.cpp 7KB
Utility.dsw 537B
Utility.ncb 65KB
Utility.opt 84KB
Utility.suo 4KB
MpfSplit.cpp 2KB
MpfClass.cpp 3KB
MpfNorm.cpp 5KB
Utility.dsp 4KB
Utility.vcproj 7KB
Utility.vcproj.SU.thsu.user 1KB
Utility.plg 2KB
Debug
MpfMerge.sbr 10KB
MpfClass.obj 8KB
MpfMerge.obj 9KB
MpfClass.sbr 10KB
Utility.obj 2KB
MpfSplit.obj 6KB
MpfNorm.obj 14KB
MpfNorm.sbr 15KB
Utility.sbr 7KB
Utility.exe 200KB
MpfSplit.sbr 10KB
MpfSort.obj 16KB
vc60.pdb 52KB
MpfSort.sbr 12KB
Utility.pdb 401KB
共 29 条
- 1
小贝德罗
- 粉丝: 69
- 资源: 1万+
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
评论0