package com.wangyang.docIndex;
import com.wangyang.entity.UrlClick;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.queries.CustomScoreProvider;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.*;
@Component
public class Searcher {
private static final float TITLE_BOOST = 1F;
private static final float DESCRI_BOOST = 1F;
private static final float TEXT_BOOST = 1F;
private static final String[] fields = {"text","title","description"};
private static final String indexDir = "/home/mysola/IdeaProjects/indexs/";
private IndexSearcher indexSearcher;
//重用indexReader
private IndexReader indexReader;
private QueryParser queryParser;
private Map<String,Double> pageRank;
private Map<String,UrlClick> anonymousUrlClick;
private Map<String,UrlClick> realNameUrlClick;
private SortedDocValues[] docUrlValues;
private int[] baseDocs;
public void setPageRank(Map<String, Double> pageRank) {
this.pageRank = pageRank;
}
public void setAnonymousUrlClick(Map<String, UrlClick> anonymousUrlClick) {
this.anonymousUrlClick = anonymousUrlClick;
}
public void setRealNameUrlClick(Map<String, UrlClick> realNameUrlClick) {
this.realNameUrlClick = realNameUrlClick;
}
@PostConstruct
public void init() throws IOException {
Directory dir = FSDirectory.open(Paths.get(indexDir));
indexReader = DirectoryReader.open(dir);
indexSearcher = new IndexSearcher(indexReader);
Map<String, Float> boosts = new HashMap<>(fields.length);
boosts.put(fields[0],TEXT_BOOST);
boosts.put(fields[1],TITLE_BOOST);
boosts.put(fields[2],DESCRI_BOOST);
queryParser = new MultiFieldQueryParser(fields,LuceneUtil.getAnalyzer(), boosts);
initDocUrlValues();
}
private void initDocUrlValues() throws IOException {
int len = indexSearcher.getIndexReader().leaves().size();
docUrlValues = new SortedDocValues[len];
baseDocs = new int[len];
LeafReaderContext leafReaderContext = null;
for (int i = 0; i < len; i++) {
leafReaderContext =indexSearcher.getIndexReader().leaves().get(i);
docUrlValues[i] = DocValues.getSorted(leafReaderContext.reader(),"url");
baseDocs[i] = leafReaderContext.docBase;
}
}
private String getDocUrlFromDocValues(int docID) {
BytesRef urlByteRef = null;
int nextBaseDocIndex = 1;
while (nextBaseDocIndex < baseDocs.length && baseDocs[nextBaseDocIndex] <= docID) {
nextBaseDocIndex++;
}
urlByteRef = docUrlValues[nextBaseDocIndex - 1].get(docID - baseDocs[nextBaseDocIndex - 1]);
if (urlByteRef != null && urlByteRef.length != 0) {
return urlByteRef.utf8ToString();
}
return null;
}
private Query buildQuery(String queryStr) throws ParseException {
Query query = queryParser.parse(queryStr);
return new MyCustomScoreQuery(query);
}
public SearchResult[] search(String queryStr,int pageNum) {
try {
Query query = buildQuery(queryStr);
TopDocs topDocs = indexSearcher.search(query,pageNum*10);
Document document = null;
QueryScorer scorer = new QueryScorer(query);
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
highlighter.setTextFragmenter(fragmenter);
SearchResult[] results = new SearchResult[topDocs.totalHits>10?10:topDocs.totalHits];
int index = 0;
for (int i = (pageNum-1)*10; i < pageNum*10&&i<topDocs.totalHits; i++) {
document = indexSearcher.doc(topDocs.scoreDocs[i].doc);
String url = getDocUrlFromDocValues(topDocs.scoreDocs[i].doc);
String title = document.get("title");
String highLightTitle = highlighter.getBestFragment(
LuceneUtil.getAnalyzer(),"title",title);
if(highLightTitle==null||"".equals(highLightTitle)){
highLightTitle = title;
}
String content = document.get("text");
String highLightContent = highlighter.getBestFragment(
LuceneUtil.getAnalyzer(),"text",content);
if(highLightContent==null||"".equals(highLightContent)){
content = document.get("description");
highLightContent = highlighter.getBestFragment(
LuceneUtil.getAnalyzer(),"description",content);
}
results[index++] = new SearchResult(highLightTitle,highLightContent,queryStr,url);
}
return results;
}catch (Exception e){
e.printStackTrace();
}
return null;
}
/**
*重写评分的实现方式
* **/
private class MyScoreProvider extends CustomScoreProvider {
private LeafReaderContext context;
public MyScoreProvider(LeafReaderContext context) {
super(context);
this.context = context;
}
private static final float QUERY_BOOST = 0.4f;
private static final float PAGE_RANK_BOOST = 0.4f;
private static final float ANONYMOUS_CLICK_BOOST = 0.1f;
private static final float REAL_NAME_CLICK_BOOST = 0.1f;
/**
* 重写评分方法
**/
@Override
public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException {
String url = DocValues.getSorted(context.reader(),"url").get(doc).utf8ToString();
float prScore = pageRank.get(url).floatValue();
int anonymousClickScore = 0;
if(anonymousUrlClick!=null){
UrlClick anonymousClick = anonymousUrlClick.get(url);
if(anonymousClick!=null){
anonymousClickScore = anonymousClick.getAnonymousClick();
}
}
int realNameClickScore = 0;
if(realNameUrlClick!=null){
UrlClick realNameClick = realNameUrlClick.get(url);
if(realNameClick!=null){
realNameClickScore = realNameClick.getRealNameClick();
}
}
return subQueryScore * QUERY_BOOST + prScore * PAGE_RANK_BOOST +
anonymousClickScore * ANONYMOUS_CLICK_BOOST +
realNameClickScore * REAL_NAME_CLICK_BOOST;
}
}
/**
* 重写CustomScoreQuery 的getCustomScoreProvider方法
* 引用自定义的Provider
*/
private class MyCustomScoreQuery extends CustomScoreQuery {
public MyCustomScoreQuery(Query subQuery) {
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
人工智能-项目实践-检索系统-旅游信息检索系统 (124个子文件)
bootstrap.css 143KB
bootstrap.min.css 118KB
bootstrap-theme.css 26KB
bootstrap-theme.min.css 23KB
search.css 298B
search.html 5KB
index.html 3KB
description.html 97B
graduation.iml 8KB
Searcher.java 8KB
Matrix.java 7KB
DocProcessor.java 5KB
NormalizedDocProcesser.java 5KB
UrlClickServiceImpl.java 4KB
Indexer.java 3KB
DocLocalizeUtil.java 2KB
PageRanker.java 2KB
SecurityConfiguration.java 2KB
PRResource.java 2KB
FlowController.java 2KB
NormalizedDoc.java 2KB
DispatchThread.java 1KB
CrawlLogReader.java 1KB
DataController.java 1KB
SearchResult.java 1KB
MyLogoutSucessHandler.java 1KB
LineInLog.java 913B
UrlClick.java 839B
MyAuthenticationSuccessHandler.java 795B
Test1.java 694B
LogUtil.java 652B
ComputeThread.java 548B
GraduationApp.java 481B
MyPasswordEncoder.java 390B
UrlClickDao.java 386B
LuceneUtil.java 261B
UserService.java 201B
jquery.min.js 82KB
bootstrap.js 68KB
jquery.form.js 47KB
bootstrap.min.js 36KB
npm.js 484B
hs_err_pid27396.log 49KB
hs_err_pid27504.log 49KB
bootstrap.min.css.map 529KB
bootstrap.css.map 380KB
bootstrap-theme.css.map 47KB
bootstrap-theme.min.css.map 25KB
log4j.properties 798B
application.properties 430B
uiDesigner.xml 9KB
pom.xml 4KB
Maven__org_springframework_boot_spring_boot_starter_security_2_0_1_RELEASE.xml 794B
Maven__org_springframework_boot_spring_boot_starter_logging_2_0_1_RELEASE.xml 787B
Maven__org_springframework_boot_spring_boot_starter_tomcat_2_0_1_RELEASE.xml 780B
Maven__org_springframework_boot_spring_boot_autoconfigure_2_0_1_RELEASE.xml 773B
Maven__org_mybatis_spring_boot_mybatis_spring_boot_autoconfigure_1_3_0.xml 769B
Maven__org_springframework_security_spring_security_config_5_0_4_RELEASE.xml 768B
Maven__com_fasterxml_jackson_module_jackson_module_parameter_names_2_9_5.xml 768B
Maven__org_springframework_boot_spring_boot_starter_jdbc_2_0_1_RELEASE.xml 766B
Maven__org_springframework_boot_spring_boot_starter_json_2_0_1_RELEASE.xml 766B
Maven__org_springframework_boot_spring_boot_starter_web_2_0_1_RELEASE.xml 759B
Maven__org_springframework_security_spring_security_core_5_0_4_RELEASE.xml 754B
Maven__org_springframework_security_spring_security_web_5_0_4_RELEASE.xml 747B
Maven__org_springframework_boot_spring_boot_devtools_2_0_1_RELEASE.xml 738B
Maven__org_springframework_boot_spring_boot_starter_2_0_1_RELEASE.xml 731B
Maven__org_mybatis_spring_boot_mybatis_spring_boot_starter_1_3_0.xml 727B
Maven__com_fasterxml_jackson_datatype_jackson_datatype_jsr310_2_9_5.xml 727B
Maven__com_fasterxml_jackson_datatype_jackson_datatype_jdk8_2_9_5.xml 713B
Maven__org_hibernate_validator_hibernate_validator_6_0_9_Final.xml 713B
Maven__org_apache_tomcat_embed_tomcat_embed_websocket_8_5_29.xml 699B
Maven__org_springframework_spring_expression_5_0_5_RELEASE.xml 697B
Maven__com_fasterxml_jackson_core_jackson_annotations_2_9_0.xml 683B
Maven__org_apache_lucene_lucene_analyzers_smartcn_6_6_1.xml 682B
Maven__org_apache_tomcat_embed_tomcat_embed_jasper_8_5_29.xml 678B
Maven__org_springframework_spring_context_5_0_5_RELEASE.xml 676B
Maven__org_apache_tomcat_tomcat_annotations_api_8_5_29.xml 675B
Maven__org_springframework_boot_spring_boot_2_0_1_RELEASE.xml 675B
Maven__org_apache_lucene_lucene_analyzers_common_6_6_1.xml 675B
Maven__org_springframework_spring_webmvc_5_0_5_RELEASE.xml 669B
Maven__org_apache_tomcat_embed_tomcat_embed_core_8_5_29.xml 664B
Maven__org_springframework_spring_beans_5_0_5_RELEASE.xml 662B
Maven__com_fasterxml_jackson_core_jackson_databind_2_9_5.xml 662B
Maven__org_springframework_spring_core_5_0_5_RELEASE.xml 655B
Maven__org_springframework_spring_jdbc_5_0_5_RELEASE.xml 655B
Maven__org_apache_tomcat_embed_tomcat_embed_el_8_5_29.xml 650B
Maven__javax_annotation_javax_annotation_api_1_3_2.xml 650B
Maven__javax_validation_validation_api_2_0_1_Final.xml 650B
Maven__org_springframework_spring_jcl_5_0_5_RELEASE.xml 648B
Maven__org_springframework_spring_aop_5_0_5_RELEASE.xml 648B
Maven__org_springframework_spring_web_5_0_5_RELEASE.xml 648B
Maven__org_jboss_logging_jboss_logging_3_3_2_Final.xml 647B
Maven__org_apache_logging_log4j_log4j_to_slf4j_2_10_0.xml 647B
Maven__org_springframework_spring_tx_5_0_5_RELEASE.xml 641B
Maven__org_apache_lucene_lucene_highlighter_6_6_1.xml 640B
Maven__org_apache_lucene_lucene_queryparser_6_6_1.xml 640B
Maven__com_fasterxml_jackson_core_jackson_core_2_9_5.xml 634B
compiler.xml 632B
Maven__mysql_mysql_connector_java_5_1_46.xml 613B
Maven__org_apache_lucene_lucene_queries_6_6_1.xml 612B
共 124 条
- 1
- 2
资源评论
博士僧小星
- 粉丝: 1921
- 资源: 5876
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- 基于JavaScript讲解的数据结构和算法
- python计算机视觉python-computer-vision.rar
- VB+ACCESS计算机等级考试管理系统(源代码+系统+答辩PPT).zip
- python密码python-ciphers.rar
- 2c60fbb3dt9ad50ed8864298eea1484b.MP4
- 基于yolov8+dlib实现视觉识别的安全驾驶监测系统部署到jetson NX平台源码+模型.zip
- Qt框架+OpenCV+动态爱心+编程教学+520
- 基于opencv+yolov8实现目标追踪及驻留时长统计源码.zip
- 水稻病害基于Yolov8算法优化目标检测识别与AI辅助决策python源码+模型+使用说明.zip
- 海尔618算价表_七海5.20_16.00xlsx(1)(2).xlsx
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功