import org.apache.commons.lang3.StringUtils;
import net.sf.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created with Chenquan.
* Description: 淘宝抓包
* Date: 2018-12-13
* Time: 15:12
*/
public class test {
public static void main(String[] args) {
int i = 0;
//填写商品详情网站
String substring = getParamByUrl("https://detail.tmall.com/item.htm?spm=a1z10.10550-b.193.11.43c44d028KCTcr&id=580863947060","id");
getAll(substring);
}
public static void getAll(String item_id ) {
try {
Thread.sleep(2000);//一个休息5s,太快会被禁
} catch (InterruptedException e) {
e.printStackTrace();
}
System.out.println("开始时间:" + new Date());
Date dateStart = new Date();
Document doc = null;
String id = "";
try {
String url = "https://item.taobao.com/item.htm?id="+item_id;
id = getParamByUrl(url, "id");
doc = Jsoup.connect(url).ignoreContentType(true).get();
} catch (IOException e) {
e.printStackTrace();
}
if (doc.baseUri().contains("tmall")) {
System.out.println("商品名称:"+ doc.select("h1[data-spm=\"1000983\"]").text());
}else {
System.out.println("商品名称:" + doc.select("h3[class=\"tb-main-title\"]").text());
}
Elements imgSrcElement = doc.select("#J_UlThumb > li");
for (Element element : imgSrcElement) {
String imgSrc = "";
if (element.baseUri().contains("tmall")){
imgSrc = element.getElementsByTag("img").attr("src");
}else{
imgSrc = element.getElementsByTag("img").attr("data-src");
}
// imgSrc = imgSrc.replaceFirst("//img.alicdn.com/imgextra/", "");
//imgSrc = imgSrc.substring(0, imgSrc.length() - 10);
imgSrc = imgSrc.replaceAll("_60x60q90.jpg",""); //处理掉不必要的数据
//String substring = imgSrc.substring(imgSrc.indexOf("_60") + 3, imgSrc.lastIndexOf(".jpg"));
//String substring = imgSrc.substring(0, imgSrc.lastIndexOf(".jpg"));
//String substring = imgSrc.substring(0, imgSrc.lastIndexOf("jpg_"));
System.out.println("主图url:" + imgSrc);
}
// 规格参数
Elements selectRules = doc.select(".J_TSaleProp");
List<List<String>> liHashMap = new ArrayList<>();
for (Element ulElement : selectRules) {
String ul = ulElement.getElementsByTag("ul").attr("data-property");
System.out.println("ul:" + ul);
List<String> liString = new ArrayList<>();
for (Element liElement : ulElement.getElementsByTag("li")) {
String liDataValue = liElement.getElementsByTag("li").attr("data-value");
//System.out.println("liDataValue: " + liDataValue);
//liString.add(liDataValue);
String aStyle = liElement.getElementsByTag("a").attr("style");
if (StringUtils.isNotBlank(aStyle)) {
aStyle = aStyle.replaceAll("background:url\\(", "");
//aStyle = aStyle.substring(0, aStyle.length() - 29);
aStyle = aStyle.replaceAll("_40x40q90.jpg\\) center no-repeat;", "");
System.out.println("aStyle: " + aStyle);
}
String spanText = liElement.getElementsByTag("span").text();
if (StringUtils.isNotBlank(spanText)) {
System.out.println("spanText: " + spanText);
}
}
liHashMap.add(liString);
}
List<String> combination = test1.combination(liHashMap);
//获取价格、库存
Elements eles = doc.getElementsByTag("script");
for (Element ele : eles) {
String s = ele.toString();
if (!ele.baseUri().contains("tmall")) {//淘宝
String rgex = "";
String subUtilSimple = "";
if (s.contains("skuMap")) {
//获取sku的id
rgex = "skuMap(.*?)propertyMemoMap";
String skuId = s.replaceAll("\\s*", "");
// System.out.println(s);
subUtilSimple = getSubUtilSimple(skuId, rgex);
subUtilSimple = subUtilSimple.substring(1, subUtilSimple.length() - 1);
//
JSONObject jb = JSONObject.fromObject(subUtilSimple);
JSONObject finalJb = jb;
List<String> skuList = new ArrayList<>();
combination.forEach(p->{
JSONObject jsonObject = finalJb.getJSONObject(";" + p + ";");
if (!jsonObject.isNullObject()) {
String o = jsonObject.getString("skuId");
System.out.println("sku的id: " + o);
skuList.add(o);
}
});
String url = "https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=";//手机的html 5 页面 ,为了获取库存、价格
String enc = "{\"itemNumId\":\"" + id + "\"}";
String substore = "";
String store = "";
try {
String gbk = URLEncoder.encode(enc, "utf-8");
String sds = url + gbk;
System.out.println("库存、价格" + sds);
doc = Jsoup.connect(sds).ignoreContentType(true).get();
store = doc.toString();
rgex = "sku2info(.*?)skuItem";
substore = getSubUtilSimple(store, rgex);
substore = substore.substring(3, substore.length() - 3);
String sub = substore.replaceAll("\\\\", "").replaceAll("\\s*", "");
JSONObject sb = JSONObject.fromObject(sub);
skuList.stream().forEach(p->{
if (sb.has(p)) {//判断是否有值,没值不取,不然会报错
String string = sb.getString(p);
System.out.println("淘宝的价格库存==============" + string);
}
});
} catch (Exception e) {
System.out.println("报错的地方store:" + store);
// System.out.println("报错的地方substore:" + substore);
e.printStackTrace();
System.out.println("=====================================程序报错,提前结束===================================================" );
return;
}
}
if (s.contains("descUrl") && s.contains("counterApi")) {
// System.out.println(s);
//详情链接
rgex = "protocol(.*?)desc\\.alicdn\\.com";
subUtilSimple = getSubUtilSimple(s, rgex);
subUtilSimple = subUtilSimple.substring(14, subUtilSimple.length() - 7);
System.out.println("详情链接: " + subUtilSimple);
try {
doc = Jsoup.connect("http:" + sub