package com.test;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* java jsoup 爬取1688网站信息,详细例子
* @author 原创 http://www.javakcsj.com/
* @date2018-8-19
*/
public class DoGet {
public static void main(String[] args) throws IOException {
DoGet t = new DoGet();
t.beginOne();
// t.beginTwo();
}
public void beginOne() throws IOException{
String[] list = new String[]{
"https://weiqufood.1688.com/page/offerlist.htm?spm=a2615.2177701.0.0.3ce1bb3dwMLc39"
};
for (int j = 0; j < list.length; j++) {
Document doc = Jsoup.connect(list[j]).data("query", "Java")
.userAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置 User-Agent
.timeout(30000).get();
Elements es = doc.select("div.image");
Element e = null;
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String a =e.select("a").attr("title");
String b = e.select("a").attr("href");
System.out.println(a);
System.out.println(b);
}
}
}
public void beginTwo() throws IOException{
Document doc = Jsoup.connect("https://detail.1688.com/offer/535597877009.html?spm=a2615.7691456.0.0.QyqIYA").data("query", "Java")
.userAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置 User-Agent
.timeout(30000).get();
// Map<String, String> cookies = null;
// Response res = Jsoup.connect("http://www.chengmi.com/shanghai").data("query", "Java")
// .userAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置 User-Agent
// .timeout(30000).execute();
// cookies = res.cookies();
// System.out.println(cookies);
// System.out.println(doc.toString());
// if(1==1){
// return;
// }
Elements es = doc.select("tr.price").select("span.value");
Element e = null;
//获取到的价格 三个
String jiage="";
String shuliangfw="";
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String price = e.html();
System.out.println(price);
jiage+="@"+price;
}
//价格对应的数量
es = doc.select("tr.amount").select("span.value");
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String amount = e.html();
System.out.println(amount);
shuliangfw+="@"+amount;
}
System.out.println("--"+jiage);
System.out.println("--"+shuliangfw);
//商品种类和可售数量
String guige="";
String shuliang="";
es = doc.select("div.d-content");
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String abc="";
Elements ess = e.select("td.name").select("span");//包装规格,用来批示多少包起售
for (int j = 0; j < ess.size(); j++) {
Element e3 = ess.get(j);
if(j==0){
guige+=""+e3.html();
abc+=""+e3.html();
}else{
guige+="@"+e3.html();
abc+="@"+e3.html();
}
}
if(abc==null||"".equals(abc)){
continue;
}
Elements ess2 = e.select("td.count");//包装规格,用来批示多少包起售
for (int j = 0; j < ess2.size(); j++) {
Element e3 = ess2.get(j);
if(j==0){
shuliang+=""+e3.select("em.value").html();
}else{
shuliang+="@"+e3.select("em.value").html();
}
}
}
System.out.println("规格:"+guige);
System.out.println("数量:"+shuliang);
//详细参数
es = doc.select("div.offerdetail_ditto_attributes");
e = es.get(0);
String table = e.select("table").html();
System.out.println(table);//详细信息表格
es = doc.select("div.tab-content-container").select("img");
//轮播图照片
String img1="";
String img2="";
String img3="";
String img4="";
String img5="";
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String url = e.attr("src");
url = url.replace(".60x60", ".400x400");
if(i==0){
img1=url;
}else if(i==1){
img2=url;
}else if(i==2){
img3=url;
}else if(i==3){
img4=url;
}else if(i==4){
img5=url;
}
System.out.println(url);
}
// es = doc.select("div.desc-lazyload-container").select("img");
// System.out.println(es.size());
//获取异步加载url
String lazyUrl = doc.select("div.desc-lazyload-container").attr("data-tfs-url");
System.out.println(lazyUrl);
}
//模拟异步调用
public Document moni(String lazyUrl) throws IOException{
Document doc=Jsoup.connect(lazyUrl)
.ignoreContentType(true)
.data("query", "Java")
.userAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置 User-Agent
.timeout(30000).get();
return doc;
}
public static void done(String url,String yeshu,String timesId) throws IOException{
int size = Integer.parseInt(yeshu);
int a = url.indexOf("&offset=");
String aa=url.substring(0,a);
for (int i = 1; i < size; i++) {
aa=aa+"&page="+i+"&click=0";
parseUrl(aa,timesId);
}
}
public static void parseUrl(String aa,String timesId) throws IOException {
Document doc = Jsoup.connect(aa).data("query", "Java")
.userAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置 User-Agent
.timeout(30000).get();
Elements es = doc.select("div.gl-i-wrap");
String sql="insert into t_one(v1,v2,v3,timesId) values(?,?,?,?);";
Element e = null;
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String a =e.select("div.p-name").select("a").attr("title");
String b = e.select("div.p-price").select("strong").attr("data-price");
String c = e.select("div.p-commit").select("a").text();
System.out.println(a+"*************"+b+"****"+c);
// System.out.println("-----------------------------------------");
}
}
}
java课程设计java jsoup 爬取1688网站信息,详细例子
4星 · 超过85%的资源 需积分: 50 98 浏览量
2018-08-24
21:28:23
上传
评论 6
收藏 389KB RAR 举报
普通网友
- 粉丝: 28
- 资源: 237