package com.engine.lucene.query;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.mira.lucene.analysis.IK_CAnalyzer;
import com.common.context.AppContext;
import com.engine.lucene.process.LoadIndexDir;
import com.knowledge.po.Knowledge;
public class Search {
public static List searchFiles(String keyword) {
Knowledge knowledge = new Knowledge();
// hitsList用来保存db的纪录,这些纪录可以通过查询结果取到
List hitsList = new ArrayList();
try {
//根据类型获取存储创建的索引文件存放的位置
LoadIndexDir lid = new LoadIndexDir();
String INDEX_STORE_PATH = lid.getINDEX_STORE_PATH();
Date start = new Date();
Directory fsDir = FSDirectory.getDirectory(INDEX_STORE_PATH);
IndexReader reader = IndexReader.open(fsDir);
Searcher searcher = new IndexSearcher(INDEX_STORE_PATH);
IK_CAnalyzer analyzer = new IK_CAnalyzer();
QueryParser parser = new QueryParser("content", analyzer);
// 解析查询关键字,比如输入的是以空格等分开的多个查询关键字,这里解析后,可以多条件查询
Query query = parser.parse(keyword);
// hits用来保存查询结果,这里的hits相当于sql中的result
Hits hits = searcher.search(query);
for (int i = 0; i < hits.length(); i++) {
Document doc = hits.doc(i);
// 获得knowledge表的主健
String id = doc.get("uid");
// 根据主健去db中取纪录,返回到hitsList中
try {
//根据ID调用在数据库里面获取信息的方法
knowledge = AppContext.getKnowledgeDAOProxy().findKnowledgeById(new Long(id));
} catch (Exception e) {
knowledge = null;
}
// 如果没有找到该纪录,表示该纪录已经不存在,不必添加到hitsList中
if (knowledge != null) {
hitsList.add(knowledge);
}
}
searcher.close();
reader.close();
Date end = new Date();
System.out
.println("search files: "
+ (end.getTime() - start.getTime())
+ " total milliseconds");
} catch (IOException e) {
System.out.println(" caught a " + e.getClass()
+ "\n with message: " + e.getMessage());
} catch (ParseException e) {
System.out.println(" caught a " + e.getClass()
+ "\n with message: " + e.getMessage());
}
return hitsList;
}
public static Hits doSearch(String key) {
Hits hits = null;
try {
//根据类型获取存储创建的索引文件存放的位置
LoadIndexDir lid = new LoadIndexDir();
String INDEX_STORE_PATH = lid.getINDEX_STORE_PATH();
Directory fsDir = FSDirectory.getDirectory(INDEX_STORE_PATH);
IndexReader reader = IndexReader.open(fsDir);
Searcher search = new IndexSearcher(INDEX_STORE_PATH);
String fields[] = {"content" };
IK_CAnalyzer analyzer = new IK_CAnalyzer();
QueryParser parser = new MultiFieldQueryParser(fields, analyzer);
Query query = parser.parse(key);
hits = search.search(query);
for (int i = 0; i < hits.length(); i++) {
Document doc = hits.doc(i);
String text = hits.doc(i).get("content");
SimpleHTMLFormatter sHtmlF = new SimpleHTMLFormatter(
"<b><font color='red'>", "</font></b>");
Highlighter highlighter = new Highlighter(sHtmlF,
new QueryScorer(parser.parse(key)));
//highlighter.setTextFragmenter(new SimpleFragmenter(100));
TokenStream tokenStream = new StandardAnalyzer().tokenStream(
"content", new StringReader(text));
// Get 3 best fragments and seperate with a "..."
String result = highlighter.getBestFragments(tokenStream, text,
3, "...");
System.out.println(result);
Field tempField = new Field("content", result, Field.Store.NO,
Field.Index.TOKENIZED, Field.TermVector.YES);
doc.removeField("content");
doc.add(tempField);
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("搜索结果:" + hits.length());
return hits;
}
public static void indexSearch(String searchType, String searchKey) {
try {
LoadIndexDir lid = new LoadIndexDir();
String INDEX_STORE_PATH = lid.getINDEX_STORE_PATH();
// 根据索引位置建立IndexSearcher
IndexSearcher searcher = new IndexSearcher(INDEX_STORE_PATH);
// 建立搜索单元,searchType代表要搜索的Filed,searchKey代表关键字
Term t = new Term(searchType, searchKey);
// 由Term生成一个Query
Query q = new TermQuery(t);
// 搜索开始时间
Date beginTime = new Date();
// 获取一个<document, frequency>的枚举对象TermDocs
TermDocs termDocs = searcher.getIndexReader().termDocs(t);
while (termDocs.next()) {
// 输出在文档中出现关键词的次数
System.out.println(termDocs.freq());
// 输出搜索到关键词的文档
System.out.println(searcher.getIndexReader().document(termDocs.doc()));
}
// 搜索完成时间
Date endTime = new Date();
// 搜索所耗时间
long timeOfSearch = endTime.getTime() - beginTime.getTime();
System.out
.println("The time For indexsearch is " + timeOfSearch + " ms");
} catch (IOException e) {
e.printStackTrace();
}
}
}
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
lucene用java索引创建,搜索.rar (45个子文件)
lucene用java索引创建,搜索
src
log4j.properties 2KB
cacheContext.xml 2KB
prop.properties 105B
com
common
cache
MethodCacheInterceptor.java 3KB
MethodCacheAfterAdvice.java 1KB
context
AppContext.java 1KB
web
tools
IQuery.java 506B
ListQuery.java 1KB
PageList.java 2KB
IPageList.java 579B
util
AbstractManager.java 176B
ReadConfiguretion.java 1KB
engine
test
Test.java 3KB
lucene
query
IndexQueryResult.java 3KB
HitsQuery.java 1KB
Search.java 6KB
MyRssSearch.java 2KB
process
IndexProcesser.java 6KB
updateIndexProcesser.java 690B
delIndexProcesser.java 3KB
DbIndexProcesser.java 4KB
LoadIndexDir.java 1KB
format
jacob
WordReader.java 1KB
WordReader.class 2KB
split
FilePreprocess.java 3KB
pdfbox
PdfLuceneTest.java 1KB
PdfboxTest.java 3KB
PdfLuceneTest.class 2KB
PdfboxTest.class 3KB
poi
WordReader.java 802B
ExcelReader.class 5KB
ExcelReader.java 5KB
WordReader.class 1KB
xpdf
Pdf2TextTest.java 432B
Pdf2Text.class 3KB
Pdf2Text.java 3KB
Pdf2TextTest.class 757B
knowledge
bo
SearchContent.java 934B
po
Knowledge.hbm.xml 694B
Knowledge.java 658B
service
KnowledgeDAO.java 268B
KnowledgeDAOImpl.java 691B
jdbc.properties 189B
applicationContext.xml 3KB
ehcache.xml 536B
共 45 条
- 1
资源评论
- scarlett_xxh2014-06-28是xml的源代码。。没看清 本来想下java的。。有点看不懂
小小马过河
- 粉丝: 12
- 资源: 81
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功