package com.jsoup;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.DBHelper;
import com.DomainConnectionHelper;
public class DataGet
{
public static String INSERTSCHOOLDATA = "insert into school(name,district,nature,IS211,IS985,NationalKeyDisciplines,Academician,DoctorPoints,MasterDegree,IndependentCollege,buzhishu,GraduateSchool,SchoolLevel,SchoolType,RecruitingPhone,Email,webaddress,address,postcode,SchoolBadge,simpAbstract) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
public static String INSERTPICTURE="insert into school_picture(Name,Source,Path) values(?,?,?)";
public static String INSERTSCHOOL_COUNT="insert into school_count(SchoolId,Count) values(?,?)";
public static String doGet(String urlStr) throws Exception {
URL url;
String html = "";
try {
url = new URL(urlStr);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
connection.setConnectTimeout(5000);
connection.setDoInput(true);
connection.setDoOutput(true);
if (connection.getResponseCode() == 200) {
InputStream in = connection.getInputStream();
html = inToStringByByte(in);
} else {
throw new Exception("服务器返回值不为200");
}
} catch (Exception e) {
e.printStackTrace();
throw new Exception("get请求失败");
}
return html;
}
public static String inToStringByByte(InputStream in) throws Exception {
ByteArrayOutputStream outStr = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = 0;
StringBuilder content = new StringBuilder();
while ((len = in.read(buffer)) != -1) {
content.append(new String(buffer, 0, len, "UTF-8"));
}
outStr.close();
return content.toString();
}
/**
* 替换四个字节的字符 '\xF0\x9F\x98\x84\xF0\x9F)的解决方案 😁
* @author ChenGuiYong
* @data 2015年8月11日 上午10:31:50
* @param content
* @return
*/
public static String removeFourChar(String content) {
byte[] conbyte = content.getBytes();
for (int i = 0; i < conbyte.length; i++) {
if ((conbyte[i] & 0xF8) == 0xF0) {
for (int j = 0; j < 4; j++) {
conbyte[i+j]=0x30;
}
i += 3;
}
}
content = new String(conbyte);
return content.replaceAll("0000", "*");
}
public static School getUrlSchoolData(int count){
String htmlStr;
School school=new School();
try {
// 将获取的网页 HTML 源代码转化为 Document
//Document doc = Jsoup.parse(htmlStr);
Document doc = Jsoup.parse(new URL("http://www.ruyile.com/school.aspx?id="+count).openStream(), "UTF-8", "http://www.ruyile.com/school.aspx?id="+count);
Element nameElement = doc.getElementsByClass("header").first();
String name = nameElement.getElementsByTag("strong").first().text();
school.setName(name);
Elements results = doc.getElementsByClass("stq");
Element result = results.get(0);
//获取result下class为xxsx的元素
Elements xxsxResults = result.getElementsByClass("xxsx");
Elements xxsxResult =xxsxResults.get(0).children();
Elements jjResultsElement = result.getElementsByClass("jj");
if(jjResultsElement.size()>0){
String jjResults = jjResultsElement.first().html();
school.setSimpAbstract(removeFourChar(jjResults.toString()));
}
//获取校徽 main
Elements imageElement = doc.getElementsByClass("xiaohui");
if(imageElement.size()>0){
Elements images = imageElement.first().getElementsByTag("img");
if(images.size()>0){
String schoolsrc = images.first().attr("src");
school.setSchoolsrc(schoolsrc);
String schoolAlt =images.first().attr("alt");
school.setSchoolAlt(schoolAlt);
}
}
for(Element element : xxsxResult){
//Elements divLinks = xxsxResult.select("div");
Element strong = element.getElementsByTag("strong").first();
String strongText =strong.text();
if("所属地区".equals(strongText)){
Elements links = element.getElementsByTag("a");
StringBuffer linkText=new StringBuffer();
for (Element link : links)
{
String text = link.text();
linkText.append(text).append(" ");
}
if(linkText.length()>0){
linkText.deleteCharAt(linkText.length()-1);
}
String district =linkText.toString();
school.setDistrict(district);
}
if("学校性质".equals(strongText)){
String nature = element.text().replace(strongText, "").replace(":", "");
school.setNature(nature);
}
if("专业类型".equals(strongText)){
String nature = element.text().replace(strongText, "").replace(":", "");
school.setNature(nature);
}
if("隶属于".equals(strongText)){
String nature = element.text().replace(strongText, "").replace(":", "");
school.setNature(nature);
}
if("211工程".equals(strongText)){
String iS211 = element.text().replace(strongText, "").replace(":", "");
school.setiS211(iS211);
}
if("985工程".equals(strongText)){
String iS985 = element.text().replace(strongText, "").replace(":", "");
school.setiS985(iS985);;
}
if("国家重点学科".equals(strongText)){
String nationalKeyDisciplines = element.text().replace(strongText, "").replace(":", "");
school.setNationalKeyDisciplines(nationalKeyDisciplines);
}
if("院士".equals(strongText)){
String academician = element.text().replace(strongText, "").replace(":", "");
school.setAcademician(academician);
}
if("博士点".equals(strongText)){
String doctorPoints = element.text().replace(strongText, "").replace(":", "");
school.setDoctorPoints(doctorPoints);
}
if("硕士点".equals(strongText)){
String masterDegree = element.text().replace(strongText, "").replace(":", "");
school.setMasterDegree(masterDegree);
}
if("独立学院".equals(strongText)){
String independentCollege = element.text().replace(strongText, "").replace(":", "");
school.setIndependentCollege(independentCollege);
}
if("教育部直属".equals(strongText)){
String buzhishu = element.text().replace(strongText, "").replace(":", "");
school.setBuzhishu(buzhishu);
}
if("开设研究生院".equals(strongText)){
String graduateSchool = element.text().replace(strongText, "").replace(":", "");
school.setGraduateSchool(graduateSchool);
}
if("学校级别".equals(strongText)){
String schoolLevel = element.text().replace(strongText, "").replace(":", "");
school.setSchoolLevel(s
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
jsoup项目实例.zip (18个子文件)
DomainConnectionHelper.java 4KB
Rule.java 2KB
jsoup-1.10.1.jar 337KB
commons-dbutils-1.5.jar 60KB
Copy.java 299B
jsoup
Rule.java 2KB
Picture.java 145B
Test.java 1KB
LinkTypeData.java 1KB
ImageUtil.java 3KB
ExtractService.java 4KB
DataTest.java 2KB
School.java 4KB
RuleException.java 666B
DataGet.java 11KB
LogHelper.java 3KB
ConnectionInfo.java 548B
DBHelper.java 7KB
共 18 条
- 1
资源评论
挺不下的脚步
- 粉丝: 2
- 资源: 8
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功