package utils;
import com.mada.pojo.*;
import org.apache.http.NameValuePair;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.safety.Whitelist;
import org.jsoup.select.Elements;
import javax.script.ScriptException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
/*
* 爬取豆瓣读书需要用到的公共方法
*
*
* */
public class bookCommons {
public static author authorGet(String authorUrl, CloseableHttpClient httpClient) throws IOException {
int aId;
String name;
String sex = "";
String birth = "";
String area = "";
String aImagePath = "";
String aboutAuthor = "";
aId = Integer.parseInt(authorUrl.substring(authorUrl.lastIndexOf("/", authorUrl.length() - 2) + 1, authorUrl.length() - 1));
CloseableHttpResponse authorResponse;
authorResponse = commonUtils.sendGet(authorUrl, httpClient);
// bookResponse=commonUtils.sendGet("https://book.douban.com/subject/1084336/",httpClient);
Document docAuthorTemp = Jsoup.parse(EntityUtils.toString(authorResponse.getEntity(), "UTF-8"));
//写入文件
//writeStringToDisk.writeHtml(doc.toString(), "1");
Element info = docAuthorTemp.getElementById("headline");//作者头部信息节点
try {
aImagePath = info.select("div.pic").get(0).getElementsByTag("img").get(0).attr("src");
} catch (NullPointerException e) {
aImagePath = "";
System.out.println("找不到头像" + authorUrl);
}
name = docAuthorTemp.getElementById("content").getElementsByTag("h1").get(0).text();
Elements infoEntityList = info.getElementsByTag("li");
for (Element infoEntity : infoEntityList) {
String infoEntityString = infoEntity.text();
if (infoEntityString.contains("性别")) {
sex = infoEntityString.split(":")[1];
} else if (infoEntityString.contains("出生日期") || infoEntityString.contains("生卒日期")) {
birth = infoEntityString.split(":")[1];
} else if (infoEntityString.contains("出生地") || infoEntityString.contains("国家/地区")) {
area = infoEntityString.split(":")[1];
}
}
Element intro = docAuthorTemp.getElementById("intro");//作者简介信息节点
if (intro.select("span[class='all hidden']").size() != 0) {
aboutAuthor = Jsoup.clean(intro.select("span[class='all hidden']").get(0).html(), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
} else {
aboutAuthor = Jsoup.clean(intro.select("div[class='bd']").get(0).html(), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
}
author authorEntity = new author();
authorEntity.setAId(aId);
authorEntity.setName(name);
authorEntity.setSex(sex);
authorEntity.setBirth(birth);
authorEntity.setArea(area);
authorEntity.setAImagePath(aImagePath);
authorEntity.setAboutAuthor(aboutAuthor);
// System.out.println("aId:" + aId);
//// System.out.println("name:" + name);
//// System.out.println("sex:" + sex);
//// System.out.println("birth:" + birth);
//// System.out.println("area:" + area);
//// System.out.println("aImagePath:" + aImagePath);
//// System.out.println("aboutAuthor:" + aboutAuthor);
return authorEntity;
}
public static book bookGet(String bookUrl, CloseableHttpClient httpClient) throws URISyntaxException, IOException, ScriptException {
CloseableHttpResponse bookResponse, authorResponse;
int bId;
String isbn = "";
String name = "";
String coverPath = "";
String authorName = "";
String publishingHouse = "";
String publishingYear = "";
String labels = "";
String briefIntroduction = "这本书还没有介绍!";
List<String> authorUrlList = new ArrayList<>();
List<String> recommonedUrlList = new ArrayList<>();
float score;
int pointNumber;
bId = Integer.parseInt(bookUrl.substring(bookUrl.lastIndexOf("/", bookUrl.length() - 2) + 1, bookUrl.length() - 1));
//bookUrl="https://book.douban.com/subject/1084336/";
bookResponse = commonUtils.sendGet(bookUrl, httpClient);
// bookResponse=commonUtils.sendGet("https://book.douban.com/subject/1084336/",httpClient);
Document docBookTemp = Jsoup.parse(EntityUtils.toString(bookResponse.getEntity(), "UTF-8"));
String debug = docBookTemp.toString();
// writeStringToDisk.writeHtml(debug, "book" + bId);
try {
score = Float.parseFloat(docBookTemp.select("#interest_sectl div.rating_self.clearfix strong").get(0).text().trim());
pointNumber = Integer.parseInt(docBookTemp.select("#interest_sectl > div > div.rating_self.clearfix > div > div.rating_sum > span > a > span").get(0).text().trim());
} catch (Exception e) {
score = 0;
pointNumber = 0;
// System.out.println("书籍信息不足:" + bookUrl);
// System.out.println(e);
// return null;
}
Element mainpic = docBookTemp.getElementById("mainpic");//书籍图片信息节点
Element info = docBookTemp.getElementById("info");//书籍信息节点
coverPath = mainpic.child(0).attr("href");//书籍封面url;
name = docBookTemp.select("#wrapper>h1").text();
//处理作者信息
//可能会出现多个作者,有的是作者的直接页面,有的不是直接的作何页面,而是作者的search,
Elements authorUrls = info.getElementsByTag("a");//id为info的节点下第一个a标签必为作者标签,不过当链接中有search时不是直接的作者页面,需再处理,只有链接中含有author时才是直接的作者页面
boolean flag = false;//记录是否搜索到作者链接
URI base = new URI(bookUrl);//处理链接中的相对路径
for (Element authorE : authorUrls) {
String authorUrl = authorE.attr("href");
if (authorUrl.contains("author"))//直接是作者页面
{
flag = true;
authorUrlList.add(base.resolve(authorUrl).toString());
} else if (false && authorUrl.contains("search"))//得到搜索页面
{
authorResponse = commonUtils.sendGet(base.resolve(authorUrl).toString(), null);
if (authorResponse.getStatusLine().getStatusCode() == 302) {
String redirectUrl = authorResponse.getHeaders("Location")[0].getValue().replace(" ", "");
authorResponse = commonUtils.sendGet(redirectUrl, null);
}
Document authorSearchTemp = Jsoup.parse(EntityUtils.toString(authorResponse.getEntity(), "UTF-8"));
//解密
Elements scripts = authorSearchTemp.getElementsByTag("script");
String windowDATA = "";
for (Element script : scripts) {
if (script.toString().contains("window.__DATA__")) {
windowDATA = script.toString().replaceAll("\\s+", "").replaceAll("<scripttype=\"text/javascript\">window.__DATA__=", "").replaceAll("window.__USER__=\\{\\}</script>", "");
}
}
String[] searchResultArray = {};
try {
List<Nam
没有合适的资源?快使用搜索试试~ 我知道了~
协同过滤服务+源代码+文档说明
![preview](https://csdnimg.cn/release/downloadcmsfe/public/img/white-bg.ca8570fa.png)
共198个文件
jar:64个
class:60个
xml:34个
![preview-icon](https://csdnimg.cn/release/downloadcmsfe/public/img/scale.ab9e0183.png)
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 189 浏览量
2024-06-06
17:33:50
上传
评论
收藏 40.17MB ZIP 举报
温馨提示
- 不懂运行,下载完可以私聊问,可远程教学 该资源内项目源码是个人的毕设,代码都测试ok,都是运行成功后才上传资源,答辩评审平均分达到96分,放心下载使用! <项目介绍> 1、该资源内项目代码都经过测试运行成功,功能ok的情况下才上传的,请放心下载使用! 2、本项目适合计算机相关专业(如计科、人工智能、通信工程、自动化、电子信息等)的在校学生、老师或者企业员工下载学习,也适合小白学习进阶,当然也可作为毕设项目、课程设计、作业、项目初期立项演示等。 3、如果基础还行,也可在此代码基础上进行修改,以实现其他功能,也可用于毕设、课设、作业等。 下载后请首先打开README.md文件(如有),仅供学习参考, 切勿用于商业用途。 --------
资源推荐
资源详情
资源评论
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![7z](https://img-home.csdnimg.cn/images/20210720083312.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
收起资源包目录
![package](https://csdnimg.cn/release/downloadcmsfe/public/img/package.f3fc750b.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/HTML.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/HTML.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/HTML.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/JAR.png)
共 198 条
- 1
- 2
资源评论
![avatar-default](https://csdnimg.cn/release/downloadcmsfe/public/img/lazyLogo2.1882d7f4.png)
![avatar](https://profile-avatar.csdnimg.cn/a3ff7c83b4464a7a89efb22831515060_abc6838.jpg!1)
机器学习的喵
- 粉丝: 1345
- 资源: 1493
上传资源 快速赚钱
我的内容管理 展开
我的资源 快来上传第一个资源
我的收益
登录查看自己的收益我的积分 登录查看自己的积分
我的C币 登录后查看C币余额
我的收藏
我的下载
下载帮助
![voice](https://csdnimg.cn/release/downloadcmsfe/public/img/voice.245cc511.png)
![center-task](https://csdnimg.cn/release/downloadcmsfe/public/img/center-task.c2eda91a.png)
安全验证
文档复制为VIP权益,开通VIP直接复制
![dialog-icon](https://csdnimg.cn/release/downloadcmsfe/public/img/green-success.6a4acb44.png)