package com.fendo.service.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.fendo.common.Page;
import com.fendo.entity.CsdnBlog;
import com.fendo.entity.News;
import com.fendo.mapper.CsdnBlogMapper;
import com.fendo.service.CsdnBlogService;
import com.google.gson.JsonObject;
import io.searchbox.client.JestClient;
import io.searchbox.core.*;
import io.searchbox.core.SearchResult.Hit;
import io.searchbox.indices.CreateIndex;
import io.searchbox.indices.DeleteIndex;
@Service
public class CsdnBlogServiceImpl implements CsdnBlogService{
public static final Logger LOGGER = Logger.getLogger(CsdnBlogServiceImpl.class);
@Autowired
private JestClient jestClient;
@Autowired
private CsdnBlogMapper CsdnBlogMapper;
private int num = 100000;
@Override
public void save(CsdnBlog csdnBlog) {
try {
System.out.println("jinlai");
CsdnBlogMapper.insertSelective(csdnBlog);
LOGGER.info("插入数据成功!!!");
} catch (Exception e) {
LOGGER.error(e);
}
}
@Override
public void get() {
CsdnBlogMapper.selectByPrimaryKey(1);
List<CsdnBlog> csdnlist =CsdnBlogMapper.selectAll();
System.out.println(csdnlist.size());
}
@Override
public void createSearchIndex() {
long start = System.currentTimeMillis();
try {
// 如果索引存在,删除索引
Bulk bulk = new Bulk.Builder().defaultIndex("article")
.defaultType("article")
.addAction(Arrays.asList(
new Index.Builder("").build(),
new Index.Builder("").build(),
new Index.Builder("").build(),
new Index.Builder("").build()
)).build();
jestClient.execute(bulk);
} catch (Exception e) {
e.printStackTrace();
}
long end = System.currentTimeMillis();
System.out.println("创建索引时间:数据量是 " + num + "记录,共用时间 -->> " + (end - start) + " 毫秒");
}
@Override
public List<CsdnBlog> search(String param) throws Exception {
long start = System.currentTimeMillis();
Map<String, Object> maps = search("all","java","",1,15);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(15);
searchSourceBuilder.from(0);
String query = searchSourceBuilder.toString();
// System.out.println(query);
Search search = new Search.Builder(query)
.addIndex("csdnblog")
.addType("article")
.build();
SearchResult result = jestClient.execute(search);
List<Hit<CsdnBlog, Void>> hits = result.getHits(CsdnBlog.class);
System.out.println("Size:" + hits.size());
for (Hit<CsdnBlog, Void> hit : hits) {
CsdnBlog csdnBlog = hit.source;
System.out.println(hit.highlight);
System.out.println(csdnBlog.getId());
}
long end = System.currentTimeMillis();
System.out.println("在" + num + "条记录中,搜索新闻,共用时间 -->> " + (end - start) + " 毫秒");
return result.getSourceAsObjectList(CsdnBlog.class);
}
/**
* 检索
*
* @param field
* @param queryString
* @param older
* @param pageNumber
* @param pageSize
* @return
* @throws Exception
*/
public Map<String, Object> search(String field, String queryString, String older, int pageNumber, int pageSize) throws Exception {
List<CsdnBlog> csdnblogs = new ArrayList<CsdnBlog>();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// 构建查询
if ("all".equals(field)) {
searchSourceBuilder.query(QueryBuilders.queryStringQuery(queryString));
} else {
searchSourceBuilder.query(QueryBuilders.termQuery(field, queryString));
// 设置排序
// searchSourceBuilder.sort(field, "asc".equals(older) ? SortOrder.ASC : SortOrder.DESC); // 设置排序字段及排序顺序
}
// 设置高亮字段
HighlightBuilder highlightBuilder = new HighlightBuilder().field("*").requireFieldMatch(false);
highlightBuilder.field("titles");//高亮title
highlightBuilder.preTags("<em>").postTags("</em>");//高亮标签
highlightBuilder.fragmentSize(200);//高亮内容长度
searchSourceBuilder.highlighter(highlightBuilder);
searchSourceBuilder.explain(true); //设置是否按查询匹配度排序
// 设置分页
searchSourceBuilder.from((pageNumber - 1) * pageSize);//设置起始页
searchSourceBuilder.size(pageSize);//设置页大小
System.out.println(searchSourceBuilder.highlighter());
Search search = new Search.Builder(searchSourceBuilder.toString())
.addIndex("csdnblog")// 索引名称
.build();
SearchResult result = jestClient.execute(search);
System.out.println("本次查询共查到:"+result.getTotal()+"篇文章!");
List<Hit<CsdnBlog,Void>> hitss = result.getHits(CsdnBlog.class);
System.out.println("---------"+hitss);
// for (Hit<CsdnBlog, Void> hit : hitss) {
// CsdnBlog source = hit.source;
//
// //获取高亮后的内容
// Map<String, List<String>> highlight = hit.highlight;
// List<String> titlelist = highlight.get("titles");//高亮后的title
// if(titlelist!=null){
// source.setTitles(titlelist.get(0));
// }
// List<String> contentlist = highlight.get("content");//高亮后的content
// if(contentlist!=null){
// source.setContent(contentlist.get(0));
// }
// System.out.println("标题:"+source.getTitles());
// System.out.println("内容:"+source.getContent());
// System.out.println("url:"+source.getUrl());
// }
// 自动解析
// parseSearchResult(articles, result);
// 手动解析
JsonObject jsonObject = result.getJsonObject();
JsonObject hitsobject = jsonObject.getAsJsonObject("hits");
System.out.println(hitsobject);
long took = jsonObject.get("took").getAsLong();
long total = hitsobject.get("total").getAsLong();
List<SearchResult.Hit<CsdnBlog, Void>> hits = result.getHits(CsdnBlog.class);
System.out.println(hitsobject);
for (SearchResult.Hit<CsdnBlog, Void> hit : hits) {
CsdnBlog source = hit.source;
//获取高亮后的内容
System.out.println("-------------"+hit.source);
Map<String, List<String>> highlight = hit.highlight;
System.out.println("-----"+hit.highlight);
System.out.println("-----------"+highlight);
List<String> titlelist = highlight.get("titles");//高亮后的
没有合适的资源?快使用搜索试试~ 我知道了~
Webmagic爬取数据导入到ES
共106个文件
class:29个
java:25个
xml:15个
3星 · 超过75%的资源 需积分: 49 44 下载量 124 浏览量
2017-09-06
15:18:41
上传
评论 3
收藏 3.46MB ZIP 举报
温馨提示
Webmagic爬取数据导入到数据库与Elasticsearch5,详细介绍请参考:http://blog.csdn.net/u011781521/article/details/77866642
资源推荐
资源详情
资源评论
收起资源包目录
Webmagic爬取数据导入到ES (106个子文件)
Elasticserach_transportClient.class 19KB
Elasticserach_jestClient.class 13KB
CsdnBlogServiceImpl.class 10KB
CSDNPageProcessor.class 9KB
ClientFactory.class 5KB
ElasticsearchUtils.class 5KB
Elasticsearch_restClient.class 4KB
CsdnBlog.class 4KB
Page.class 2KB
JdbcPipeline.class 2KB
CsdnBlogSearchController.class 2KB
SwaggerConfig.class 2KB
PersonController.class 2KB
Person.class 2KB
SpringConfiguration.class 2KB
Elasticsearch_template.class 1KB
ClientFactory$1.class 1KB
News.class 984B
PersonServiceImpl.class 975B
JestController.class 906B
CsdnBlogService.class 633B
CommonUtils.class 618B
TransportController.class 556B
NodeController.class 541B
CsdnBlogMapper.class 514B
BaseController.class 297B
PersonMapper.class 283B
PersonService.class 282B
Utils.class 274B
.classpath 1KB
org.eclipse.wst.common.component 704B
org.eclipse.wst.jsdt.ui.superType.container 49B
bootstrap.css 143KB
bootstrap.min.css 118KB
bootstrap-theme.css 26KB
bootstrap-theme.min.css 23KB
glyphicons-halflings-regular.eot 20KB
CsdnBlogServiceImpl.java 9KB
CSDNPageProcessor.java 8KB
ClientFactory.java 4KB
ElasticsearchUtils.java 4KB
Page.java 3KB
CsdnBlog.java 3KB
SwaggerConfig.java 2KB
CsdnBlogSearchController.java 2KB
JdbcPipeline.java 2KB
Person.java 1KB
SpringConfiguration.java 1KB
JestController.java 1KB
PersonController.java 1KB
CsdnBlogService.java 695B
News.java 628B
PersonServiceImpl.java 600B
CsdnBlogMapper.java 498B
NodeController.java 409B
CommonUtils.java 333B
TransportController.java 320B
PersonMapper.java 198B
PersonService.java 193B
Utils.java 128B
BaseController.java 63B
BusinessException.java 31B
bootstrap.js 68KB
bootstrap.min.js 36KB
npm.js 484B
.jsdtscope 572B
search.jsp 4KB
meta.jsp 1KB
index.jsp 740B
person.jsp 545B
test.log 16.14MB
bootstrap.min.css.map 529KB
bootstrap.css.map 380KB
bootstrap-theme.css.map 47KB
bootstrap-theme.min.css.map 25KB
MANIFEST.MF 39B
org.eclipse.wst.jsdt.ui.superType.name 6B
org.eclipse.jdt.core.prefs 736B
org.eclipse.core.resources.prefs 101B
org.eclipse.m2e.core.prefs 90B
.project 1KB
log4j.properties 351B
log4j.properties 351B
db.properties 255B
db.properties 255B
glyphicons-halflings-regular.svg 106KB
glyphicons-halflings-regular.ttf 44KB
glyphicons-halflings-regular.woff 23KB
glyphicons-halflings-regular.woff2 18KB
pom.xml 8KB
CsdnBlogMapper.xml 6KB
CsdnBlogMapper.xml 6KB
applicationContext.xml 4KB
applicationContext.xml 4KB
springmvc.xml 2KB
springmvc.xml 2KB
web.xml 1KB
log4j2.xml 742B
log4j2.xml 742B
myBatisConfig.xml 430B
共 106 条
- 1
- 2
资源评论
- suifengoffice2018-11-19谢谢分享!
- fjzzmike2017-09-18谢谢分享!
码农致富
- 粉丝: 3672
- 资源: 112
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功