package utils;
import com.mada.pojo.*;
import org.apache.http.NameValuePair;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.safety.Whitelist;
import org.jsoup.select.Elements;
import javax.script.ScriptException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
/*
* 爬取豆瓣读书需要用到的公共方法
*
*
* */
public class bookCommons {
public static author authorGet(String authorUrl, CloseableHttpClient httpClient) throws IOException {
int aId;
String name;
String sex = "";
String birth = "";
String area = "";
String aImagePath = "";
String aboutAuthor = "";
aId = Integer.parseInt(authorUrl.substring(authorUrl.lastIndexOf("/", authorUrl.length() - 2) + 1, authorUrl.length() - 1));
CloseableHttpResponse authorResponse;
authorResponse = commonUtils.sendGet(authorUrl, httpClient);
// bookResponse=commonUtils.sendGet("https://book.douban.com/subject/1084336/",httpClient);
Document docAuthorTemp = Jsoup.parse(EntityUtils.toString(authorResponse.getEntity(), "UTF-8"));
//写入文件
//writeStringToDisk.writeHtml(doc.toString(), "1");
Element info = docAuthorTemp.getElementById("headline");//作者头部信息节点
try {
aImagePath = info.select("div.pic").get(0).getElementsByTag("img").get(0).attr("src");
} catch (NullPointerException e) {
aImagePath = "";
System.out.println("找不到头像" + authorUrl);
}
name = docAuthorTemp.getElementById("content").getElementsByTag("h1").get(0).text();
Elements infoEntityList = info.getElementsByTag("li");
for (Element infoEntity : infoEntityList) {
String infoEntityString = infoEntity.text();
if (infoEntityString.contains("性别")) {
sex = infoEntityString.split(":")[1];
} else if (infoEntityString.contains("出生日期") || infoEntityString.contains("生卒日期")) {
birth = infoEntityString.split(":")[1];
} else if (infoEntityString.contains("出生地") || infoEntityString.contains("国家/地区")) {
area = infoEntityString.split(":")[1];
}
}
Element intro = docAuthorTemp.getElementById("intro");//作者简介信息节点
if (intro.select("span[class='all hidden']").size() != 0) {
aboutAuthor = Jsoup.clean(intro.select("span[class='all hidden']").get(0).html(), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
} else {
aboutAuthor = Jsoup.clean(intro.select("div[class='bd']").get(0).html(), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
}
author authorEntity = new author();
authorEntity.setAId(aId);
authorEntity.setName(name);
authorEntity.setSex(sex);
authorEntity.setBirth(birth);
authorEntity.setArea(area);
authorEntity.setAImagePath(aImagePath);
authorEntity.setAboutAuthor(aboutAuthor);
// System.out.println("aId:" + aId);
//// System.out.println("name:" + name);
//// System.out.println("sex:" + sex);
//// System.out.println("birth:" + birth);
//// System.out.println("area:" + area);
//// System.out.println("aImagePath:" + aImagePath);
//// System.out.println("aboutAuthor:" + aboutAuthor);
return authorEntity;
}
public static book bookGet(String bookUrl, CloseableHttpClient httpClient) throws URISyntaxException, IOException, ScriptException {
CloseableHttpResponse bookResponse, authorResponse;
int bId;
String isbn = "";
String name = "";
String coverPath = "";
String authorName = "";
String publishingHouse = "";
String publishingYear = "";
String labels = "";
String briefIntroduction = "这本书还没有介绍!";
List<String> authorUrlList = new ArrayList<>();
List<String> recommonedUrlList = new ArrayList<>();
float score;
int pointNumber;
bId = Integer.parseInt(bookUrl.substring(bookUrl.lastIndexOf("/", bookUrl.length() - 2) + 1, bookUrl.length() - 1));
//bookUrl="https://book.douban.com/subject/1084336/";
bookResponse = commonUtils.sendGet(bookUrl, httpClient);
// bookResponse=commonUtils.sendGet("https://book.douban.com/subject/1084336/",httpClient);
Document docBookTemp = Jsoup.parse(EntityUtils.toString(bookResponse.getEntity(), "UTF-8"));
String debug = docBookTemp.toString();
// writeStringToDisk.writeHtml(debug, "book" + bId);
try {
score = Float.parseFloat(docBookTemp.select("#interest_sectl div.rating_self.clearfix strong").get(0).text().trim());
pointNumber = Integer.parseInt(docBookTemp.select("#interest_sectl > div > div.rating_self.clearfix > div > div.rating_sum > span > a > span").get(0).text().trim());
} catch (Exception e) {
score = 0;
pointNumber = 0;
// System.out.println("书籍信息不足:" + bookUrl);
// System.out.println(e);
// return null;
}
Element mainpic = docBookTemp.getElementById("mainpic");//书籍图片信息节点
Element info = docBookTemp.getElementById("info");//书籍信息节点
coverPath = mainpic.child(0).attr("href");//书籍封面url;
name = docBookTemp.select("#wrapper>h1").text();
//处理作者信息
//可能会出现多个作者,有的是作者的直接页面,有的不是直接的作何页面,而是作者的search,
Elements authorUrls = info.getElementsByTag("a");//id为info的节点下第一个a标签必为作者标签,不过当链接中有search时不是直接的作者页面,需再处理,只有链接中含有author时才是直接的作者页面
boolean flag = false;//记录是否搜索到作者链接
URI base = new URI(bookUrl);//处理链接中的相对路径
for (Element authorE : authorUrls) {
String authorUrl = authorE.attr("href");
if (authorUrl.contains("author"))//直接是作者页面
{
flag = true;
authorUrlList.add(base.resolve(authorUrl).toString());
} else if (false && authorUrl.contains("search"))//得到搜索页面
{
authorResponse = commonUtils.sendGet(base.resolve(authorUrl).toString(), null);
if (authorResponse.getStatusLine().getStatusCode() == 302) {
String redirectUrl = authorResponse.getHeaders("Location")[0].getValue().replace(" ", "");
authorResponse = commonUtils.sendGet(redirectUrl, null);
}
Document authorSearchTemp = Jsoup.parse(EntityUtils.toString(authorResponse.getEntity(), "UTF-8"));
//解密
Elements scripts = authorSearchTemp.getElementsByTag("script");
String windowDATA = "";
for (Element script : scripts) {
if (script.toString().contains("window.__DATA__")) {
windowDATA = script.toString().replaceAll("\\s+", "").replaceAll("<scripttype=\"text/javascript\">window.__DATA__=", "").replaceAll("window.__USER__=\\{\\}</script>", "");
}
}
String[] searchResultArray = {};
try {
List<Nam
没有合适的资源?快使用搜索试试~ 我知道了~
协同过滤服务+源代码+文档说明
共198个文件
jar:64个
class:60个
xml:34个
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 120 浏览量
2024-06-06
17:33:50
上传
评论
收藏 40.17MB ZIP 举报
温馨提示
- 不懂运行,下载完可以私聊问,可远程教学 该资源内项目源码是个人的毕设,代码都测试ok,都是运行成功后才上传资源,答辩评审平均分达到96分,放心下载使用! <项目介绍> 1、该资源内项目代码都经过测试运行成功,功能ok的情况下才上传的,请放心下载使用! 2、本项目适合计算机相关专业(如计科、人工智能、通信工程、自动化、电子信息等)的在校学生、老师或者企业员工下载学习,也适合小白学习进阶,当然也可作为毕设项目、课程设计、作业、项目初期立项演示等。 3、如果基础还行,也可在此代码基础上进行修改,以实现其他功能,也可用于毕设、课设、作业等。 下载后请首先打开README.md文件(如有),仅供学习参考, 切勿用于商业用途。 --------
资源推荐
资源详情
资源评论
收起资源包目录
协同过滤服务+源代码+文档说明
(198个子文件)
bookCommons.class 12KB
bookCommons.class 12KB
bookCommons.class 12KB
book.class 7KB
book.class 7KB
book.class 7KB
commonUtils.class 6KB
commonUtils.class 6KB
commonUtils.class 6KB
user.class 5KB
user.class 5KB
user.class 5KB
user.class 4KB
user.class 4KB
user.class 4KB
author.class 4KB
author.class 4KB
author.class 4KB
login.class 4KB
login.class 4KB
login.class 4KB
book.class 3KB
book.class 3KB
book.class 3KB
response.class 3KB
response.class 3KB
response.class 3KB
comment.class 2KB
comment.class 2KB
comment.class 2KB
readingRecord.class 2KB
readingRecord.class 2KB
readingRecord.class 2KB
message.class 2KB
message.class 2KB
message.class 2KB
loadData.class 2KB
loadData.class 2KB
loadData.class 2KB
userService.class 2KB
userService.class 2KB
userService.class 2KB
myExceptionHandler.class 2KB
myExceptionHandler.class 2KB
myExceptionHandler.class 2KB
bookService.class 1KB
bookService.class 1KB
bookService.class 1KB
writeStringToDisk.class 1KB
writeStringToDisk.class 1KB
writeStringToDisk.class 1KB
test.class 1KB
test.class 1KB
test.class 1KB
userMapper.class 897B
userMapper.class 897B
userMapper.class 897B
bookMapper.class 786B
bookMapper.class 786B
bookMapper.class 786B
setMessage.html 2KB
setMessage.html 2KB
setMessage.html 2KB
CollaborativeFiltering.iml 924B
CollaborativeFiltering.iml 829B
aspectjweaver-1.8.9.jar 1.78MB
aspectjweaver-1.8.9.jar 1.78MB
jackson-databind-2.10.0.jar 1.34MB
jackson-databind-2.10.0.jar 1.34MB
spring-context-4.2.6.RELEASE.jar 1.05MB
spring-context-4.2.6.RELEASE.jar 1.05MB
spring-core-4.2.6.RELEASE.jar 1.05MB
spring-core-4.2.6.RELEASE.jar 1.05MB
mysql-connector-java-5.1.47.jar 984KB
mysql-connector-java-5.1.47.jar 984KB
spring-webmvc-4.2.6.RELEASE.jar 857KB
spring-webmvc-4.2.6.RELEASE.jar 857KB
spring-web-4.2.6.RELEASE.jar 749KB
spring-web-4.2.6.RELEASE.jar 749KB
httpclient-4.5.6.jar 749KB
httpclient-4.5.6.jar 749KB
spring-beans-4.2.6.RELEASE.jar 714KB
spring-beans-4.2.6.RELEASE.jar 714KB
mybatis-3.1.1.jar 636KB
mybatis-3.1.1.jar 636KB
spring-test-4.2.6.RELEASE.jar 549KB
spring-test-4.2.6.RELEASE.jar 549KB
log4j-1.2.17.jar 478KB
log4j-1.2.17.jar 478KB
commons-lang3-3.4.jar 424KB
commons-lang3-3.4.jar 424KB
spring-jdbc-4.2.6.RELEASE.jar 414KB
spring-jdbc-4.2.6.RELEASE.jar 414KB
jstl-1.2.jar 405KB
jstl-1.2.jar 405KB
jsoup-1.11.3.jar 386KB
jsoup-1.11.3.jar 386KB
spring-aop-4.2.6.RELEASE.jar 362KB
spring-aop-4.2.6.RELEASE.jar 362KB
jackson-core-2.10.0.jar 340KB
共 198 条
- 1
- 2
资源评论
机器学习的喵
- 粉丝: 1955
- 资源: 2067
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功