package com.rabbitmq.worksn;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
public class TestJsoup {
public static int temp = 0;
public static List<String> itemUrlList = new ArrayList<>();
public static List<String> itemUrlListbad = new ArrayList<>();
private static final ObjectMapper MAPPER = new ObjectMapper();
public static int a =0;
public static void main(String[] args ) throws Exception{
ArrayList<String> aa = new ArrayList<>();
// 1aa.add("20103");
// 1aa.add("20084");
// aa.add("20016");
// aa.add("20090");
// aa.add("20062");
// aa.add("410503");
// aa.add("294003");
// aa.add("20099");
// aa.add("20529");
// aa.add("157123");
// aa.add("337020");
aa.add("157237");
// aa.add("500678");
// aa.add("157159");
// aa.add("157133");
// 1aa.add("258003");
// 1aa.add("157123");
// 1aa.add("20002");
for (String string : aa) {
String firsturl="http://list.suning.com/0-"+string+"-";
sendURLs(firsturl);
}
Recv.main1();
}
//http://as.suning.com/allsort.htm
public static void sendURLs(String url) throws Exception{
for(int i=0;i<100;i++){
String pageurl=url+i+".html";
System.out.println(i);
if(temp==1){
temp=0;
break;
}
getTtemUrlListByPage(pageurl);
}
Send.send();
}
//从页面中解析出多个商品的id
public static void getTtemUrlListByPage(String pageUrl) throws IOException{
//class:css样式.gl-i-wrap j-sku-item,空格代表多个样式的分割符,如果是多个样式就用多个选择器
String url = "";
try{
Elements eles = Jsoup.connect(pageUrl).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").timeout(5000).get().select("em").select(".prive").select(".price");
System.out.println(eles.size());
if(eles.size()==0){
temp=1;
}else{
temp =0;
}
for(Element ele:eles){
try{
String[] temp = ele.attr("datasku").replace("|||||", " ").split(" ");
url="http://product.suning.com/"+temp[1]+"/"+temp[0]+".html";
itemUrlList.add(url);
System.out.println(url);
}catch(Exception e){
continue;
}
}
}catch(Exception e){
//把错误的链接写入到单独的文件中,然后对错误的链接进行单独的执行
e.printStackTrace();
itemUrlListbad.add(url);
}
}
@Test
public void ma(){
String url = "http://product.suning.com/0000000000/619981933.html";
String[] ID=url.replace("http://product.suning.com/", "").replace(".html","").split("/");
String storeID = ""+ID[0];
String productID=""+ID[1];
System.out.println(storeID+productID);
}
/**
* 获取商品价格的地址
* @param itemID
* @param carID
* @return
*/
public static String getpriceurl(String itemID, String carID) {
String priceURL = null;
priceURL = "http://pas.suning.com/nspcsale_0"
+ "_000000000"+itemID
+ "_000000000"+itemID
+ "_"+carID
+ "_10_010_0100101_20358_1000000_9017_10106_Z001_.html?callback=pcData&_=1479706452429";
return priceURL;
}
/**
* 获取商品价格的方法
* @param priceURL
* @return
*/
public static String getPrice(String priceURL) throws Exception{
String price = "";
Document doc;
doc = Jsoup.connect(priceURL).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").timeout(3000).get();
String page = doc.text();
page = page.substring(7, page.lastIndexOf(")"));
JsonNode json = MAPPER.readTree(page);
JsonNode pricejson = json.get("data").get("price").get("saleInfo").get(0).get("netPrice");
price = pricejson.asText();
return price;
}
//从一个商品的链接页面获取页面信息,落地,写入数据库
public static Item getItem(String url) throws Exception{
Document doc = Jsoup.connect(url).get();
Item item = new Item();
//得到店铺ID,和商品ID所以我们开始进行筛选
//http://product.suning.com/0000000000/619981933.html
String[] ID=url.replace("http://product.suning.com/", "").replace(".html","").split("/");
String storeID = ""+ID[0];
String productID=""+ID[1];
//标题
String name = doc.select("#itemDisplayName:not(span)").text();
item.setTitle(name);
//抓取价格ajax请求
String priceurl =getpriceurl(productID,storeID);
String price = getPrice(priceurl)+"";
if(price.contains("."))
price = price.substring(0,price.lastIndexOf("."));
item.setPrice(Long.valueOf(price+0));
//买点
String sellPoint = doc.select("#promotionDesc").text();
item.setSellPoint(sellPoint);
//图片
String image = doc.select("#bigImg Img").attr("src");
item.setImage(image);
//产地
item.setItemSource("");
//商店
String storename = doc.select("#curShopName").text();
item.setStore(storename);
//评论个数
String[] comment;
comment = getitemsource(productID,storeID);
item.setComment_good(comment[1]);
item.setComment_general(comment[2]);
item.setComment_bad(comment[3]);
return item;
}
/**
*
* @param itemID
* @param carID
* @return 得到的是好评度,好评树,中评数,差评数
*/
private static String[] getitemsource(String itemID, String carID) {
Document doc;
String itemsource[] = new String[]{"","","",""};
while(itemsource[0].equals("")){
try {
String url = "http://review.suning.com/ajax/review_satisfy/general-000000000"+itemID
+ "-"+carID
+ "-----satisfy.htm?callback=satisfy";
doc = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").timeout(3000).get();
String page = doc.text();
page = page.substring(8,page.lastIndexOf(")"));
JsonNode json1 = MAPPER.readTree(page);
JsonNode json = json1.get("reviewCounts");
Integer oneStarCount = Integer.valueOf(json.get(0).get("oneStarCount").asText());
Integer twoStarCount = Integer.valueOf(json.get(0).get("twoStarCount").asText());
Integer threeStarCount = Integer.valueOf(json.get(0).get("threeStarCount").asText());
Integer fourStarCount = Integer.valueOf(json.get(0).get("fourStarCount").asText());
Integer fiveStarCount = Integer.valueOf(json.get(0).get("fiveStarCount").asText());
Integer totalCount = Integer.valueOf(json.get(0).get("totalCount").asText());
Integer one = fourStarCount+fiveStarCount;
Integer two = twoStarCount+threeStarCount;
Integer three = oneStarCount;
Double source = (double)one*10/totalCount;
String source1 = source.toString().substring(0,source.toString().lastIndexOf(".")+2);
itemsource[1] = String.valueOf(one);
itemsource[2] = String.valueOf(two);
itemsource[3] = String.valueOf(three);
itemsource[0] = source1;
} catch (IOException e) {
System.out.println("访问出错getitemsource");
try {
Thread.sleep(20000);
} catch (InterruptedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
}
return itemsource;
}
}
没有合适的资源?快使用搜索试试~ 我知道了~
比价网站京东苏宁数据爬取代码-使用rabbitmq
共134个文件
jar:56个
java:33个
class:32个
5星 · 超过95%的资源 需积分: 34 48 下载量 140 浏览量
2017-07-28
19:15:29
上传
评论 3
收藏 18.15MB ZIP 举报
温馨提示
本资源与web端的比价网站是一体的,两个合并使用,即是一个完整的网站程序
资源推荐
资源详情
资源评论
收起资源包目录
比价网站京东苏宁数据爬取代码-使用rabbitmq (134个子文件)
TestJsoup.class 8KB
TestJsoup.class 6KB
Recv.class 4KB
Recv.class 4KB
Item.class 3KB
Item.class 3KB
Recv2.class 3KB
Recv.class 3KB
GetTmPrice.class 3KB
GetTmPrice.class 3KB
Recv2.class 3KB
Recv2.class 3KB
Recv.class 3KB
Recv.class 3KB
Recv2.class 2KB
JDBCCRUD.class 2KB
JDBCCRUD.class 2KB
Recv2.class 2KB
Send.class 2KB
Recv.class 2KB
Send.class 2KB
Send.class 2KB
Send.class 2KB
Send.class 2KB
Send.class 2KB
JDBCUtils.class 2KB
JDBCUtils.class 2KB
ConnectionUtil.class 898B
test.class 884B
test.class 880B
SpringMain.class 877B
Foo.class 703B
.classpath 5KB
guava-15.0.jar 2.07MB
aspectjweaver-1.8.4.jar 1.78MB
jackson-databind-2.4.2.jar 1.03MB
spring-context-4.1.3.RELEASE.jar 1003KB
spring-core-4.1.3.RELEASE.jar 983KB
mysql-connector-java-5.1.32.jar 946KB
spring-webmvc-4.1.3.RELEASE.jar 763KB
mybatis-3.2.8.jar 697KB
spring-web-4.1.3.RELEASE.jar 697KB
spring-beans-4.1.3.RELEASE.jar 691KB
hibernate-validator-5.1.3.Final.jar 619KB
httpclient-4.3.5.jar 577KB
joda-time-2.5.jar 574KB
commons-collections-3.2.1.jar 562KB
mysql-connector-java-5.0.8-bin.jar 528KB
log4j-1.2.16.jar 470KB
spring-jdbc-4.1.3.RELEASE.jar 417KB
amqp-client-3.5.1.jar 405KB
jstl-1.2.jar 405KB
commons-lang3-3.3.2.jar 403KB
spring-aop-4.1.3.RELEASE.jar 351KB
jedis-2.6.0.jar 323KB
spring-rabbit-1.4.0.RELEASE.jar 312KB
jsoup-1.9.1.jar 312KB
spring-messaging-4.1.2.RELEASE.jar 281KB
httpcore-4.3.2.jar 276KB
commons-codec-1.9.jar 258KB
spring-expression-4.1.3.RELEASE.jar 253KB
spring-tx-4.1.3.RELEASE.jar 246KB
jsqlparser-0.9.1.jar 246KB
commons-beanutils-1.9.2.jar 228KB
jackson-core-2.4.2.jar 220KB
commons-io-2.4.jar 181KB
commons-io-2.2.jar 170KB
bonecp-0.8.0.RELEASE.jar 108KB
spring-retry-1.1.2.RELEASE.jar 106KB
commons-pool2-2.0.jar 105KB
spring-amqp-1.4.0.RELEASE.jar 88KB
mapper-2.3.2.jar 84KB
commons-fileupload-1.3.1.jar 67KB
validation-api-1.1.0.Final.jar 62KB
commons-logging-1.1.3.jar 61KB
classmate-1.0.0.jar 59KB
jboss-logging-3.1.3.GA.jar 56KB
spring-aspects-4.1.3.RELEASE.jar 55KB
persistence-api-1.0.jar 51KB
mybatis-spring-1.2.2.jar 48KB
mybatis-paginator-1.2.15.jar 45KB
jackson-annotations-2.4.0.jar 38KB
httpmime-4.3.1.jar 36KB
pagehelper-3.4.2.jar 30KB
jt-common-0.0.1-SNAPSHOT.jar 29KB
slf4j-api-1.7.2.jar 25KB
slf4j-log4j12-1.6.4.jar 10KB
bonecp-spring-0.8.0.RELEASE.jar 6KB
aopalliance-1.0.jar 4KB
TestJsoup.java 8KB
TestJsoup.java 5KB
GetTmPrice.java 3KB
GetTmPrice.java 3KB
Item.java 2KB
Item.java 2KB
Recv.java 2KB
Recv.java 2KB
JDBCUtils.java 2KB
JDBCUtils.java 2KB
Recv2.java 2KB
共 134 条
- 1
- 2
资源评论
- kang32832382019-01-07下载少包,无法跑起来,代码可以借鉴
- fuxirjf2018-04-16楼主这个下载缺失很多包呀有没有完整的发一个给我呀120353560@qq.com
- zhangchengtian662017-11-02太好用了支持爱爸妈身心健康的程序员2017-12-13谢谢支持
爱爸妈身心健康的程序员
- 粉丝: 4
- 资源: 5
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功