#include<stdio.h>
#include<malloc.h>
#include<string.h>
#define MaxKeyNum 500//主题词最多
#define MaxWordLen 20//单词最长
#define MaxOrdiNum 150//常用词最多
#define MaxTextNum 400//文章最多单词
#define MaxTextLen 1000//段落最大长度
char Text[MaxTextLen];//一定要是全局变量
typedef struct
{
char ordiword[MaxOrdiNum][MaxWordLen];
int last;
}OrdiListType;
typedef struct Info
{
int loc[3];
struct Info *next;
}Info;
typedef struct
{
char key[MaxTextLen];
Info *head,*cur;
}IdxTermType;
typedef struct
{
IdxTermType Item[MaxKeyNum];
int last;
}IdxListType;
typedef struct
{
char *field;
Info *head;
}FieldTermType;
typedef struct
{
FieldTermType Item[MaxTextNum];
int last;
}FieldListType;
char *fname[]={"","introduction.txt","introduction-2.txt","step1.txt","step2.txt","step2-2.txt","step3.txt","step4.txt","step5.txt","step5-2.txt","step6.txt"};
void InitIdxList(IdxListType *s)
{
s->last=-1;
}
void CreatOrdiList(OrdiListType *s)//从平凡词表文档建立平凡词表
{
char buf[MaxWordLen];
char fmt[10];
FILE *f;
s->last=-1;
f=fopen("Ordinary Vocabulary.txt","r");
sprintf(fmt,"%%%ds",MaxWordLen-1);
while(fscanf(f,fmt,buf)!=EOF)
strcpy(s->ordiword[++s->last],buf);
}
char *unquote(char *p)
{
if(p[0]=='"'||p[0]=='(')
{
if(p[strlen(p)-1]=='"'||p[strlen(p)-1]==')')
p[strlen(p)-1]='\0';
p++;
}
return p;
}
int GetLine(FILE *f,FieldListType *s)//从文档中读入单词
{
char *p,*q;
int nf;
if(fgets(Text,sizeof(Text),f)==NULL)
return -1;
nf=-1;
for(q=Text;(p=strtok(q,",. \n?!:;"))!=NULL;q=NULL)
s->Item[++nf].field=unquote(p);
return nf;
}
int PutInfo(FieldListType *s,int FLi,int fi,int para,int line)//给单词赋位置信息
{
s->Item[FLi].head=(Info *)malloc(sizeof(Info));
if(s->Item[FLi].field[0]=='#')//field[0]=='#'
{
s->Item[FLi].head->loc[2]=++line;
s->Item[FLi].field++;
}
else
s->Item[FLi].head->loc[2]=line;
if(s->Item[FLi].field[0]>='A'&&s->Item[FLi].field[0]<='Z')
s->Item[FLi].field[0]=s->Item[FLi].field[0]+32;
s->Item[FLi].head->loc[1]=para;
s->Item[FLi].head->loc[0]=fi;
s->Item[FLi].head->next=NULL;
return line;
}
int WordIsKey(char *word,OrdiListType *s)//是否是关键词
{
int i;
for(i=0;i<=s->last;i++)
if(strcmp(word,s->ordiword[i])==0)
return 0;
return 1;
}
int Locate(IdxListType *s,char *word,char *b)//找单词在索引表中的位置
{
int low,high,mid,cmp;
low=0;
high=s->last;
while(low<=high)
{
mid=(low+high)/2;
cmp=strcmp(word,s->Item[mid].key);
if(cmp<0)
high=mid-1;
else if(cmp>0)
low=mid+1;
else
{
strcpy(b,"TRUE");
return mid;
}
}
strcpy(b,"FALSE");
return low;
}
void InsertNewKey(IdxListType *s,int j,FieldTermType Data)//插入新的关键词
{
int i;
Info *p;
for(i=s->last;i>=j;--i)
{
strcpy(s->Item[i+1].key,s->Item[i].key);
s->Item[i+1].head=s->Item[i].head;
s->Item[i+1].cur=s->Item[i].cur;
}
strcpy(s->Item[j].key,Data.field);
p=(Info *)malloc(sizeof(Info));
for(i=0;i<3;i++)
p->loc[i]=Data.head->loc[i];
s->Item[j].head=p;
s->Item[j].head->next=NULL;
s->Item[j].cur=s->Item[j].head;
++s->last;
}
void InsertNewInfo(IdxListType *s,int j,FieldTermType Data)//插入新的关键词信息
{
int i;
Info *p;
p=(Info *)malloc(sizeof(Info));
for(i=0;i<3;i++)
p->loc[i]=Data.head->loc[i];
p->next=NULL;
s->Item[j].cur->next=p;
s->Item[j].cur=s->Item[j].cur->next;
}
void InsIdxList(IdxListType *s,FieldTermType Data)//向索引表中插入新元素
{
int j;
char b[10];
j=Locate(s,Data.field,b);
if(strcmp(b,"FALSE")==0)
InsertNewKey(s,j,Data);
else
InsertNewInfo(s,j,Data);
}
void PutIdxList(IdxListType *s)//把索引表写入文档
{
int i;
FILE *f;
f=fopen("IdxList.txt","wb");
for(i=0;i<=s->last;i++)
if(fwrite(s->Item[i].key,strlen(s->Item[i].key)+1,1,f)!=1)
printf("file write error\n");
fclose(f);
}
void Search(IdxListType *s)//查询
{
char sch[MaxWordLen];
char text[MaxTextLen];
char b[10];
char get;
int loc;
int i,j;
FILE *f;
printf("Search the key word(Y/N):");
get=getchar();
while(get=='Y')
{
printf("Search:");
scanf("%s",sch);
loc=Locate(s,sch,b);
if(strcmp(b,"FALSE")==0)
printf("The word is not found.\n");
else
{
printf("The word is found in:\n");
printf(" filenum paranum linenum\n");
for(s->Item[loc].cur=s->Item[loc].head,i=1;s->Item[loc].cur!=NULL;s->Item[loc].cur=s->Item[loc].cur->next,i++)
{
printf("%d.",i);
for(j=0;j<3;j++)
printf("%7d ",s->Item[loc].cur->loc[j]);
printf("\n");
}
}
printf("\n");
printf("Search the key word(Y/N):");
get=getchar();//接收上一个输入的\n
get=getchar();
}
printf("\n");
printf("Show the text(Y/N):");
get=getchar();//接受上一个输入的\n
get=getchar();
if(get=='Y')
for(i=1;i<=10;i++)
{
f=fopen(fname[i],"r");
while(fgets(text,sizeof(text),f)!=NULL)
{
printf("%s",text);
printf("\n");
}
fclose(f);
}
return;
}
void main()
{
OrdiListType *OrdiList;
IdxListType *IdxList;
FieldListType *FieldList;
FILE *f;
int fi;
OrdiList=(OrdiListType *)malloc(sizeof(OrdiListType));
IdxList=(IdxListType *)malloc(sizeof(IdxListType));
FieldList=(FieldListType *)malloc(sizeof(FieldListType));
CreatOrdiList(OrdiList);
InitIdxList(IdxList);
for(fi=1;fi<=10;fi++)
{
int para=1,line=1;
f=fopen(fname[fi],"r");
while((FieldList->last=GetLine(f,FieldList))!=-1)
{
int FLi;
for(FLi=0;FLi<=FieldList->last;FLi++)
{
line=PutInfo(FieldList,FLi,fi,para,line);
if(WordIsKey(FieldList->Item[FLi].field,OrdiList)==1)
InsIdxList(IdxList,FieldList->Item[FLi]);
}
para++;
}
fclose(f);
}
printf("%d\n",IdxList->last);
Search(IdxList);
PutIdxList(IdxList);
return;
}