package test;
import java.io.*;
import java.util.*;
/**
*
* <p>
* Title:中文分词
* </p>
* <p>
* Description:对中文进行分词,保留其中的数字,英文不变
* </p>
* <p>
* Copyright: IRLab.SDU (c) 2007
* </p>
* <p>
* Company: IRLab.SDU
* </p>
*
* @author: CZM/ODN
* @version 1.0
*/
public class Segmenter {
private static HashMap wordsLib;
private static HashSet csurname, cforeign, cnumbers, cnotname, cstopwords;
public final static int TRAD = 0; // 繁体
public final static int SIMP = 1; // 简体
public final static int BOTH = 2; // 简繁体
public final static int MAXMATCHINGLENTH = 7; // 最大匹配长度
private static final String WORDFLAG_ORG = "1";// 标志字典中原有的词
private static final String WORDFLAG_CUT = "2";// 标志被切割的词的部分
static {
loadWordLibs("-g");
}
private static void addNewWord(HashMap wordslib, String word, String flag) {
if (word.indexOf("#") == -1 && word.length() <= MAXMATCHINGLENTH) {
if (wordsLib.containsKey(word.intern()) == false)
wordslib.put(word.intern(), flag);
if (word.length() > 2) {
flag = WORDFLAG_CUT;
addNewWord(wordslib, word.substring(0, word.length() - 1)
.intern(), flag);
}
}
}
private static void loadEntriesFromFile(HashMap wordslib, String filePath) {
// 得到词库的相对路径
String dataPath = System.getProperty("user.dir")
+ System.getProperty("file.separator") + "data"
+ System.getProperty("file.separator");
InputStream worddata = null; // 中文词库文件
try {
worddata = new FileInputStream(dataPath + filePath);
BufferedReader in = new BufferedReader(new InputStreamReader(
worddata, "UTF8"));
String newword = "";
while ((newword = in.readLine()) != null) {
addNewWord(wordslib, newword, WORDFLAG_ORG);
}
in.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 装载词库,在分词前只需载入一次
*
* @param encodingFlag:编码标记
* "-b" Big5, "-g" GB2312, "-8" UTF-8, "-s" SIMP, "-t" TRAD
*/
// Charform is TRAD, SIMP or BOTH
public static void loadWordLibs(String encodingFlag) {
int charform = Segmenter.SIMP; // 默认是简体中文
if (encodingFlag.equals("-b")) { // Setting to Big5, TRAD
charform = Segmenter.TRAD;
} else if (encodingFlag.equals("-g")) { // Setting to GB, SIMP
charform = Segmenter.SIMP;
} else if (encodingFlag.equals("-8")) { // UTF-8
charform = Segmenter.BOTH;
} else if (encodingFlag.equals("-s")) { // SIMP
charform = Segmenter.SIMP;
} else if (encodingFlag.equals("-t")) { // TRAD
charform = Segmenter.TRAD;
}
csurname = new HashSet(); // 中文姓
cforeign = new HashSet(); // 外文名所用的汉字
cnumbers = new HashSet(); // 汉字数字
cnotname = new HashSet(); // 不可能用做名字
cstopwords = new HashSet(); // 停用词
// 得到词库的相对路径
String path = System.getProperty("user.dir")
+ System.getProperty("file.separator") + "data"
+ System.getProperty("file.separator");
// 载入词库
if (charform == SIMP) {
loadSet(cnumbers, path + "snumbers_u8.txt", "UTF-8"); // 简体字库,文件是以UTF-8格式编码
loadSet(cforeign, path + "sforeign_u8.txt", "UTF-8");
loadSet(csurname, path + "ssurname_u8.txt", "UTF-8");
loadSet(cnotname, path + "snotname_u8.txt", "UTF-8");
//loadSet(cstopwords, path + "stopwords_u8.txt", "UTF-8");
// 对stopwords_gbk.txt而言,是用GBK编码,不能用GB2312,好像GB2312的编码范围小于GBK,如"·",GB2312不能解析,而GBK可以
// loadset(cstopwords, path + "stopwords_gbk.txt", "GBK");
// //载入停用词,文件是以GBK编码
} else if (charform == TRAD) {
loadSet(cnumbers, path + "tnumbers_u8.txt", "UTF-8");
loadSet(cforeign, path + "tforeign_u8.txt", "UTF-8");
loadSet(csurname, path + "tsurname_u8.txt", "UTF-8");
loadSet(cnotname, path + "tnotname_u8.txt", "UTF-8");
//loadSet(cstopwords, path + "stopwords_u8.txt", "UTF-8");
} else { // BOTH
loadSet(cnumbers, path + "snumbers_u8.txt", "UTF-8");
loadSet(cforeign, path + "sforeign_u8.txt", "UTF-8");
loadSet(csurname, path + "ssurname_u8.txt", "UTF-8");
loadSet(cnotname, path + "snotname_u8.txt", "UTF-8");
loadSet(cnumbers, path + "tnumbers_u8.txt", "UTF-8");
loadSet(cforeign, path + "tforeign_u8.txt", "UTF-8");
loadSet(csurname, path + "tsurname_u8.txt", "UTF-8");
loadSet(cnotname, path + "tnotname_u8.txt", "UTF-8");
//loadSet(cstopwords, path + "stopwords_u8.txt", "UTF-8");
}
wordsLib = new HashMap(); // 中文词库
loadEntriesFromFile(wordsLib, "搜狗输入法词库.txt");
System.out.println("Total keys: " + wordsLib.size());
}
/**
* 载入一个词典
*
* @param targetset
* 目标
* @param sourcefile
* 词典文件
*/
/** Load a set of character data */
private static void loadSet(HashSet targetset, String sourcefile,
String charsetName) {
String dataline = null;
try {
// InputStream setdata = getClass().getResourceAsStream(sourcefile);
InputStream setdata = new FileInputStream(sourcefile);
BufferedReader in = new BufferedReader(new InputStreamReader(
setdata, charsetName));
while ((dataline = in.readLine()) != null) {
if ((dataline.indexOf("#") > -1) || (dataline.length() == 0)) {
continue;
}
targetset.add(dataline.intern());
}
in.close();
} catch (Exception e) {
System.err.println("Exception loading data file: " + sourcefile + " "
+ e);
}
}
/**
* 判断是否是数字
*
* @param testword
* @return
*/
private static boolean isAllNumber(String testword) {
boolean result = true;
for (int i = 0; i < testword.length(); i++) {
if (cnumbers.contains(testword.substring(i, i + 1).intern()) == false) {
result = false;
break;
}
}
return result;
}
private static boolean isStartWithChineseSurname(String testword) {
if (testword.length() == 1)
return csurname.contains(testword.substring(0, 1).intern());
else
return csurname.contains(testword.substring(0, 1).intern())
|| csurname.contains(testword.substring(0, 2).intern());
}
/**
* 判断是否是英文汉字,如"托马斯"
*
* @param testword
* @return
*/
private static boolean isAllForeign(String testword) {
boolean result = true;
for (int i = 0; i < testword.length(); i++) {
if (cforeign.contains(testword.substring(i, i + 1).intern()) == false) {
result = false;
break;
}
}
return result;
}
/**
* 实现对cline中的中文进行分词,生成的结果以separator分割(注意:这里去停用词)
*
* @param cLine
* @param separator
* @return
*/
public static String segmentString(String cLine, String separator) {
StringBuffer currentWord = new StringBuffer(); // 当前新生成的词
StringBuffer outLine = new StringBuffer(); // 分完词的输出
int i, cLength;
char currentChar;
// separator = " ";
cLength = cLine.length();
for (i = 0; i < cLength; i++) {
currentChar = cLine.charAt(i); // 取出i位置的字符
if (Character.UnicodeBlock.of(currentChar) == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) { // 如果是中文字符
// Character in CJK block
if (currentWord.length() == 0) { // 当前词currentword中没有任何字符,start
// looking for next word
// System.err.println("current word length 0");
if (i > 0
&& (Character.isWhitespace(cLine.charAt(i - 1)) == false)) { // 防止加入多个separator
outLine.append(separator);
}
currentWord.append(currentChar); // 当前字符不是空格
} else { // 当前词currentword中已有了字符
String currentWordWithCurrentChar = new St
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
课程设计大学生,图书管理系统的实现。陈旧的图书馆不够现代化,为了迎接信息数据化的今天,开发一个系统化的图书信息管理方式,使图书馆的管理简单化、便捷化,通过java开发一个图书管理系统,从而实现图书馆的网络化、信息化,使得图书的查阅和借阅变得更加快捷、方便,有效提高图书馆的工作效率,同时也方便图书管理员对读者的信息进行管理。本项目是由我已经学习的java、数据库基础知识简单开发一个图书管理系统。根据需求,开发了两套系统,第一套系统:通过sqlserver数据库管理系统的连接交互对图书信息(图书编号、图书书名、图书价格)进行管理(增、删、改、查等);第二套系统:针对用户(学生、老师)实现借书、还书、查书等功能,以及相关文件的保存,数据的导出。本系统的最终用户是面向管理员(图书馆管理员和其他管 系统维护人员是计算机专业人员,熟悉操作系统和数据库,是间隔性用户。 图书管理系统有两类用户分别为管理员、读者,其中读者分为教师和学生。管理员通过认证后进入系统,管理员可以完成维护系统的功能,包括管理图书和管理读者。读者则可以查询图书、修改个人资料、查询个人借阅信息、归还图书的功能,完成后可以退出系统。
资源推荐
资源详情
资源评论
收起资源包目录
图书借阅管理系统全源码 (195个子文件)
Segmenter.class 8KB
Segmenter.class 8KB
ConnectDB.class 6KB
ConnectDB.class 6KB
ModifyBook.class 5KB
ModifyBook.class 5KB
BorrowTableModel.class 5KB
BorrowTableModel.class 5KB
ModifyReader.class 4KB
ModifyReader.class 4KB
ManagerRegister.class 4KB
BookTableModel.class 4KB
BookLend$listener.class 4KB
BookTableModel.class 4KB
ReaderRegister.class 4KB
BookBack$listener.class 4KB
ReaderTableModel.class 4KB
BookLend$listener.class 4KB
ManagerModify.class 4KB
ReaderTableModel.class 4KB
ManagerModify.class 4KB
BookBack$listener.class 4KB
ReaderModify.class 4KB
ReaderModify.class 4KB
ReaderpwModify.class 4KB
ReaderpwModify.class 4KB
ManagerpwModify.class 4KB
ManagerpwModify.class 4KB
ModifyRead.class 4KB
ModifyRead.class 4KB
ManagerEnter.class 4KB
ManagerRegister.class 4KB
ReaderEnter.class 4KB
ReaderEnter.class 3KB
ReaderMessage.class 3KB
ManagerFrame.class 3KB
ReaderMessage.class 3KB
ManagerFrame.class 3KB
AddBook.class 3KB
ReaderRegister.class 3KB
ManagerEnter.class 3KB
BookSeeking$listener.class 3KB
ManagerMessage.class 3KB
ManagerMessage.class 3KB
BookLend.class 3KB
ReaderFrame.class 3KB
BookSeeking.class 3KB
ReaderFrame.class 3KB
ManagerRegister$1.class 3KB
ManagerRegister$1.class 3KB
DataOperator.class 3KB
DataOperator.class 3KB
AddBook.class 3KB
BookLend.class 3KB
ReaderRegister$1.class 3KB
DeleteBook$listener.class 3KB
ReaderRegister$1.class 3KB
welcome.class 3KB
BookBack.class 3KB
ReaderSeeking.class 3KB
welcome.class 3KB
BookBack.class 3KB
DeleteBook.class 3KB
AddBook$listener.class 2KB
DeleteReader.class 2KB
AddBook$listener.class 2KB
BookSeeking$listener.class 2KB
DeleteReader$listener.class 2KB
DeleteBook.class 2KB
DeleteReader$listener.class 2KB
BookSeeking.class 2KB
AddReader.class 2KB
Search.class 2KB
Search.class 2KB
ReaderEnter$3.class 2KB
DeleteReader.class 2KB
DeleteBook$listener.class 2KB
ReaderEnter$3.class 2KB
ReaderSeeking$listener.class 2KB
ManagerEnter$2.class 2KB
ReaderSeeking.class 2KB
AddReader$listener.class 2KB
ManagerEnter$2.class 2KB
AddReader$listener.class 2KB
AddReader.class 2KB
ReaderSeeking$listener.class 2KB
ReaderEnter$1.class 1KB
ManagerMessage$listener.class 1KB
ReaderEnter$1.class 1KB
ReaderMessage$listener.class 1KB
ManagerMessage$listener.class 1KB
ManagerFrame$3.class 1KB
ManagerFrame$3.class 1KB
ReaderMessage$listener.class 1KB
ManagerFrame$2.class 1KB
ManagerFrame$2.class 1KB
ReaderFrame$2.class 1KB
ReaderFrame$1.class 1KB
ReaderFrame$2.class 1KB
ReaderFrame$1.class 1KB
共 195 条
- 1
- 2
资源评论
杭椒
- 粉丝: 1498
- 资源: 3
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功