package weli.v2;
import org.apache.commons.lang3.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Selectable;
import java.util.*;
/**
* 根据给定综述介绍页
* 爬取参数页
*/
public class ZolPageProcessor4_ implements PageProcessor {
@Override
public void process(Page page) {
page.setCharset("utf-8");
Map<String, Selectable> map = new HashMap<String, Selectable>() {
{
put("上市日期", null);
put("电商报价", null);
put("主屏分辨率", null);
put("RAM容量", null);
put("ROM容量", null);
put("CPU型号", null);
put("电池容量", null);
put("主屏尺寸", null);
put("手机重量", null);
put("手机尺寸", null);
put("操作系统", null);
put("出厂系统内核", null);
}
};
Html html = page.getHtml();
html.xpath("//div[@class=\"detailed-parameters\"]/table/tbody/tr")
.nodes()
.forEach((selectable) -> {
for (String s1 : map.keySet()) {
if (selectable.regex("[\\d\\D]*" + s1 + "[\\d\\D]*").match()) {
map.put(s1, selectable);
}
}
});
String url = page.getUrl().toString();
String brand = html.xpath("//div[@class=\"product-model page-title clearfix\"]/h1/text()").toString();
Phone phone = new Phone().setBrand(brand).setUrl(url);
if (map.get("上市日期") != null) {
String timeToMarket = map.get("上市日期").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(timeToMarket)) {
timeToMarket = map.get("上市日期").xpath("//td/span/a/text()").toString();
}
phone.setTimeToMarket(timeToMarket);
}
if (map.get("电商报价") != null) {
String price = map.get("电商报价").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(price)) {
price = map.get("电商报价").xpath("//td/span/a/text()").toString();
phone.setPrice(price);
}
}
if (map.get("主屏分辨率") != null) {
String resolutionRatio = map.get("主屏分辨率").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(resolutionRatio)) {
resolutionRatio = map.get("主屏分辨率").xpath("//td/span/a/text()").toString();
}
phone.setResolutionRatio(resolutionRatio);
}
if (map.get("RAM容量") != null) {
String ram = map.get("RAM容量").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(ram)) {
ram = map.get("RAM容量").xpath("//td/span/a/text()").toString();
}
phone.setRam(ram);
}
if (map.get("ROM容量") != null) {
String rom = map.get("ROM容量").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(rom)) {
rom = map.get("ROM容量").xpath("//td/span/a/text()").toString();
}
phone.setRom(rom);
}
if (map.get("CPU型号") != null) {
String cpu = map.get("CPU型号").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(cpu)) {
cpu = map.get("CPU型号").xpath("//td/span/a/text()").toString();
}
phone.setCpu(cpu);
}
if (map.get("主屏尺寸") != null) {
String screen = map.get("主屏尺寸").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(screen)) {
screen = map.get("主屏尺寸").xpath("//td/span/a/text()").toString();
}
phone.setScreen(screen);
}
if (map.get("手机重量") != null) {
String weight = map.get("手机重量").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(weight)) {
weight = map.get("手机重量").xpath("//td/span/a/text()").toString();
}
phone.setWeight(weight);
}
if (map.get("手机尺寸") != null) {
String size = map.get("手机尺寸").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(size)) {
size = map.get("手机尺寸").xpath("//td/span/a/text()").toString();
}
phone.setSize(size);
}
if (map.get("电池容量") != null) {
String battery = map.get("电池容量").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(battery)) {
battery = map.get("电池容量").xpath("//td/span/a/text()").toString();
}
phone.setBattery(battery);
}
if (map.get("操作系统") != null || map.get("出厂系统内核") != null) {
phone.setOs(Optional.ofNullable(map.get("操作系统"))
.orElseGet(() -> map.get("出厂系统内核"))
.xpath("//td/span/text()")
.toString());
}
phone.format();
page.putField(url, phone);
}
public Phone process(Html html, Phone phone) {
Map<String, Selectable> map = new HashMap<String, Selectable>() {
{
put("上市日期", null);
put("电商报价", null);
put("主屏分辨率", null);
put("RAM容量", null);
put("ROM容量", null);
put("CPU型号", null);
put("电池容量", null);
put("主屏尺寸", null);
put("手机重量", null);
put("手机尺寸", null);
put("操作系统", null);
put("出厂系统内核", null);
}
};
html.xpath("//div[@class=\"detailed-parameters\"]/table/tbody/tr")
.nodes()
.forEach((selectable) -> {
for (String s1 : map.keySet()) {
if (selectable.regex("[\\d\\D]*" + s1 + "[\\d\\D]*").match()) {
map.put(s1, selectable);
}
}
});
String brand = html.xpath("//div[@class=\"product-model page-title clearfix\"]/h1/text()").toString();
phone.setBrand(brand);
if (map.get("上市日期") != null) {
String timeToMarket = map.get("上市日期").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(timeToMarket)) {
timeToMarket = map.get("上市日期").xpath("//td/span/a/text()").toString();
}
phone.setTimeToMarket(timeToMarket);
}
// if (map.get("电商报价") != null) {
// String price = map.get("电商报价").xpath("//td/span/text()").toString();
// if (StringUtils.isEmpty(price)){
// price = map.get("电商报价").xpath("//td/span/a/text()").toString();
// phone.setPrice(price);
// }
// }
if (map.get("主屏分辨率") != null) {
String resolutionRatio = map.get("主屏分辨率").xpath("//td/span/text()").toString();
if (StringUtils.isEmpty(resolutionRatio)) {
resolutionRatio = map.get("主屏分辨率").xpath("//td/span/a/text()").toString();