package com.study.utils;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.gargoylesoftware.htmlunit.*;
import com.gargoylesoftware.htmlunit.html.*;
import com.study.bo.AreaDataBO;
import com.study.common.utils.DateTimeUtil;
import com.study.common.utils.sortUtil;
import com.study.enums.AreaUrlEnum;
import com.study.impl.DomesticInformationServiceImpl;
import com.study.pojo.AreaData;
import com.study.pojo.CumulativeTrend;
import com.study.pojo.DomesticInformation;
import lombok.SneakyThrows;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.LogFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.logging.Level;
/**
* @Author: liyuankun
* @Date: 2021/1/18 16:43
* @Description: 爬虫工具类
**/
public class HtmlParseUtils {
private static final Logger logger = LoggerFactory.getLogger(HtmlParseUtils.class);
public static void main(String[] args) throws Exception {
// String url = "https://voice.baidu.com/newpneumonia/getv2?target=trend&isCaseIn=1&from=mola-virus&area=%E9%BB%91%E9%BE%99%E6%B1%9F&stage=publish&callback=jsonp_1613633348680_3577";
// String url = "https://voice.baidu.com/newpneumonia/getv2?target=trend&isCaseIn=1&from=mola-virus&area=%E5%90%89%E6%9E%97&stage=publish&callback=jsonp_1613637249909_26237";
String url = "https://voice.baidu.com/newpneumonia/getv2?target=trend&isCaseIn=1&from=mola-virus&area=%E5%90%89%E6%9E%97&stage=publish&callback=jsonp_1613640462140_23156";
// parse(url);
// System.out.println(JSONObject.toJSONString(jsonToMapAreaData(url)));
jsonToMapBasicData();
}
// 解析 https://voice.baidu.com/act/newpneumonia/newpneumonia/#tab1
@SneakyThrows
public static Document parse(String url) {
LogFactory.getFactory().setAttribute("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.NoOpLog");
java.util.logging.Logger.getLogger("com.gargoylesoftware.htmlunit").setLevel(Level.OFF);
java.util.logging.Logger.getLogger("org.apache.commons.httpclient").setLevel(Level.OFF);
final WebClient webClient = new WebClient(BrowserVersion.CHROME);//新建一个模拟谷歌Chrome浏览器的浏览器客户端对象
webClient.getOptions().setThrowExceptionOnScriptError(false);//当JS执行出错的时候是否抛出异常, 这里选择不需要
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);//当HTTP的状态非200时是否抛出异常, 这里选择不需要
webClient.getOptions().setActiveXNative(false);
webClient.getOptions().setCssEnabled(false);//是否启用CSS, 因为不需要展现页面, 所以不需要启用
webClient.getOptions().setJavaScriptEnabled(true); //很重要,启用JS
webClient.setAjaxController(new NicelyResynchronizingAjaxController());//很重要,设置支持AJAX
HtmlPage page = null;
try {
page = webClient.getPage(new URL(url));//尝试加载上面图片例子给出的网页
webClient.waitForBackgroundJavaScript(5000);//异步JS执行需要耗时,所以这里线程要阻塞30秒,等待异步JS执行结束
} catch (Exception e) {
logger.error("<<<<<< parse error cause", e);
}finally {
webClient.close();
}
String pageXml = page.asXml();//直接将加载完成的页面转换成xml格式的字符串
System.out.println(pageXml);
Document document = Jsoup.parse(pageXml);//获取html文档
return document;
}
//各地区数据
public static AreaData jsonToMapAreaData(String url) throws IOException {
AreaData areaData = new AreaData();
InputStream is = new URL(url).openStream();
BufferedReader rd = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
String jsonText = readAll(rd);
int start = jsonText.indexOf("success") + 17;
int last = jsonText.length() - 4;
String substring = jsonText.substring(start, last);
Map map = JSON.parseObject(substring,Map.class);
//区域名字
areaData.setArea((String) map.get("name"));
Map trendMap = JSON.parseObject(map.get("trend").toString() ,Map.class);
List<AreaDataBO> list = JSONArray.parseArray(trendMap.get("list").toString()).toJavaList(AreaDataBO.class);
//取最后一个数据
int size = list.get(0).getData().size();
//累计确诊
String cumulativeNumber = String.valueOf(list.get(0).getData().get(size - 1));
areaData.setCumulativeNumber(cumulativeNumber);
//累计治愈
String cureNumber = String.valueOf(list.get(1).getData().get(size - 1));
areaData.setCureNumber(cureNumber);
//累计死亡
String deathNumber = String.valueOf(list.get(2).getData().get(size - 1));
areaData.setDeathNumber(deathNumber);
//新增确诊
String newNumber = String.valueOf(list.get(3).getData().get(size - 1));
areaData.setNewNumber(newNumber);
//现有确诊
int existingNumber = Integer.parseInt(cumulativeNumber) - (Integer.parseInt(cureNumber) + Integer.parseInt(deathNumber));
areaData.setExistingNumber(String.valueOf(existingNumber));
//时间
areaData.setDate(DateTimeUtil.getNow("yyyy-MM-dd"));
return areaData;
}
//基础数据
public static List<String> jsonToMapBasicData(){
String url = "https://voice.baidu.com/act/newpneumonia/newpneumonia/#tab1";
Document document = parse(url);
String text1 = document.getElementsByClass("mola-window").get(0).child(0).tagName("div").className();
String substring1 = text1.substring(6, 13);
ArrayList<String> list = new ArrayList<>();
//时间
String timeString = "Virus_1-1-295_32Y_aO";
String timeStringReplace = timeString.replace(timeString.substring(6, 13), substring1);
Elements elementsDate = document.getElementsByClass(timeStringReplace);
for (Element date : elementsDate) {
String span = date.getElementsByTag("span").eq(0).text();
String substring = span.substring(6);
list.add(substring);
}
//基础数据
String dataString = "VirusSummarySix_1-1-295_2ZJJBJ";
String dataStringReplace = dataString.replace(dataString.substring(16, 23), substring1);
Elements elementsData = document.getElementsByClass(dataStringReplace);
for (Element elementsDatum : elementsData) {
String text = elementsDatum.text();
list.add(text);
}
//新增数据
String addDataString = "VirusSummarySix_1-1-300_nfO6Mw";
String addDataStringReplace = addDataString.replace(addDataString.substring(16, 23), substring1);
Elements elementsAdd = document.getElementsByClass(addDataStringReplace);
for (Element element : elementsAdd) {
String span = element.getElementsByTag("span").eq(0).text();
list.add(span);
}
return list;
}
//区域数据
public static List<AreaData> jsonToMapAreaData(){
String url = "https://voice.baidu.com/act/newpneumonia/newpneumonia/#tab1";
Document document = parse(url);
//取到更新的class
String text1 = document.getElementsByClass("mola-window").get(0).child(0).tagName("div"
没有合适的资源?快使用搜索试试~ 我知道了~
java本科毕业设计基于Echarts全国疫情大屏展示与设计源码.zip
共248个文件
java:82个
json:78个
png:19个
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 30 浏览量
2023-03-21
14:15:05
上传
评论
收藏 8.8MB ZIP 举报
温馨提示
java本科毕业设计基于Echarts全国疫情大屏展示与设计源码.zip
资源推荐
资源详情
资源评论
收起资源包目录
java本科毕业设计基于Echarts全国疫情大屏展示与设计源码.zip (248个子文件)
.browserslistrc 33B
mvnw.cmd 7KB
app.0ddc270f.css 11KB
index.css 6KB
index.css 6KB
golbal.css 390B
.gitignore 412B
.gitignore 364B
index.html 832B
index.html 669B
favicon.ico 4KB
favicon.ico 4KB
maven-wrapper.jar 50KB
HtmlParseUtils.java 13KB
BasicDataAddRequest.java 11KB
BasicDataAddBO.java 11KB
BasicDataAdd.java 11KB
BasicDataRequest.java 11KB
BasicDataBO.java 11KB
BasicData.java 11KB
DateTimeUtil.java 10KB
AreaDataBO.java 8KB
AreaDataRequest.java 8KB
AreaData.java 8KB
AreaUrlEnum.java 6KB
BasicDataAddServiceImpl.java 6KB
CumulativeTrendServiceImpl.java 6KB
NewTrendRequest.java 6KB
NewTrendBO.java 6KB
NewTrend.java 6KB
BasicDataServiceImpl.java 6KB
NewTrendServiceImpl.java 6KB
BeanConvertUtil.java 5KB
CumulativeTrendRequest.java 5KB
CumulativeTrendBO.java 5KB
CumulativeTrend.java 5KB
DomesticInformationRequest.java 5KB
DomesticInformationBO.java 5KB
DomesticInformation.java 5KB
MavenWrapperDownloader.java 5KB
DomesticInformationServiceImpl.java 5KB
AreaDataServiceImpl.java 5KB
QueryBase.java 4KB
QueryPage.java 4KB
DemoApplicationTests.java 3KB
DomesticInformationMapper.java 2KB
BaseQueryService.java 2KB
CumulativeTrendMapper.java 2KB
BasicDataAddMapper.java 2KB
AreaDataController.java 2KB
BasicDataMapper.java 2KB
NewTrendMapper.java 2KB
AreaDataMapper.java 2KB
IPUtils.java 2KB
ChineseAndEnglish.java 2KB
DomesticInformationController.java 2KB
WebSocket.java 2KB
CumulativeTrendController.java 2KB
InitValueConstant.java 2KB
AreaDataJob.java 1KB
CommonErrorCodeEnum.java 1KB
NewTrendController.java 1KB
BasicDataController.java 1KB
BasicDataAddController.java 1KB
DictRequest.java 1KB
PageQueryService.java 1KB
DictService.java 1KB
DomesticInformationJob.java 1011B
CumulativeTrendJob.java 1007B
NewTrendJob.java 974B
BasicDataAddJob.java 965B
BasicDataJob.java 925B
IBaseDao.java 822B
WebSocketEnum.java 762B
DomesticInformationService.java 653B
WebMvcConfig.java 647B
BasicDataService.java 609B
DomesticInformationResponse.java 598B
sortUtil.java 592B
BaseResponse.java 571B
CumulativeTrendResponse.java 541B
CumulativeTrendService.java 539B
BasicDataAddService.java 535B
BasicDataAddResponse.java 523B
BasicDataResponse.java 505B
AreaDataResponse.java 499B
NewTrendResponse.java 499B
AreaDataService.java 475B
NewTrendService.java 474B
IBaseDomain.java 469B
WebSocketConfig.java 468B
DemoApplication.java 410B
FileBO.java 402B
DictResponse.java 398B
QueryGetParameters.java 208B
bg.a6333996.jpg 252KB
bg.jpg 252KB
bg.jpg 252KB
echarts.min.js 734KB
echarts.min.js 734KB
共 248 条
- 1
- 2
- 3
资源评论
oligaga
- 粉丝: 50
- 资源: 2万+
下载权益
C知道特权
VIP文章
课程特权
开通VIP
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功