package htmlbrowser;
import org.apache.http.HttpHost;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
/**
* 进行代理访问
*
* setConnectTimeout:设置连接超时时间,单位毫秒.
* setConnectionRequestTimeout:设置从connect Manager获取Connection 超时时间,单位毫秒.
* 这个属性是新加的属性,因为目前版本是可以共享连接池的.
* setSocketTimeout:请求获取数据的超时时间,单位毫秒.如果访问一个接口,多少时间内无法返回数据,就直接放弃此次调用。
*/
public class HttpResponseDemo {
public static String getHtml( String url, String ip, String port) {
String entity = null;
CloseableHttpClient httpClient = HttpClients.createDefault();
//设置代理访问和超时处理
HttpHost proxy = new HttpHost(ip, Integer.parseInt(port));
RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(3000).
setSocketTimeout(3000).build();
HttpGet httpGet = new HttpGet(url);
httpGet.setConfig(config);
httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" +
"q=0.9,image/webp,*/*;q=0.8");
httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch");
httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
httpGet.setHeader("Cache-Control", "no-cache");
httpGet.setHeader("Connection", "keep-alive");
httpGet.setHeader("Cookie", "_free_proxy_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJTRkYjMyM" +
"TU3NGRjMWVhM2JlMDA5Y2IyNzZlZmVlZTYwBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMUhtT0pjcnRT" +
"bm9CZEllSXNTYkNZZWk2Nnp3NGNDcFFSQVFodzk1dmpLZWM9BjsARg%3D%3D--09d8736fbfb9a8544" +
"b46eef48bb320c2b40ee721; Hm_lvt_0cf76c77469e965d2957f0553e6ecf59=1492128157,149" +
"2160558,1492347839,1492764281; Hm_lpvt_0cf76c77469e965d2957f0553e6ecf59=1492764295");
httpGet.setHeader("Host", "www.xicidaili.com");
httpGet.setHeader("Pragma", "no-cache");
httpGet.setHeader("Upgrade-Insecure-Requests", "1");
httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " +
"(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
try {
//客户端执行httpGet方法,返回响应
CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
//得到服务响应状态码
if (httpResponse.getStatusLine().getStatusCode() == 200) {
entity = EntityUtils.toString(httpResponse.getEntity(), "utf-8");
}
httpResponse.close();
httpClient.close();
} catch (ClientProtocolException e) {
entity = null;
} catch (IOException e) {
entity = null;
}
return entity;
}
//对上一个方法的重载,使用本机ip进行网站爬取
public static String getHtml(String url) throws ClassNotFoundException,
IOException {
String entity = null;
CloseableHttpClient httpClient = HttpClients.createDefault();
//设置超时处理
RequestConfig config = RequestConfig.custom().setConnectTimeout(5000).
setSocketTimeout(5000).build();
HttpGet httpGet = new HttpGet(url);
httpGet.setConfig(config);
httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" +
"q=0.9,image/webp,*/*;q=0.8");
httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch");
httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
httpGet.setHeader("Cache-Control", "no-cache");
httpGet.setHeader("Connection", "keep-alive");
httpGet.setHeader("Cookie", "_free_proxy_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJTRkYjMyM" +
"TU3NGRjMWVhM2JlMDA5Y2IyNzZlZmVlZTYwBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMUhtT0pjcnRT" +
"bm9CZEllSXNTYkNZZWk2Nnp3NGNDcFFSQVFodzk1dmpLZWM9BjsARg%3D%3D--09d8736fbfb9a8544" +
"b46eef48bb320c2b40ee721; Hm_lvt_0cf76c77469e965d2957f0553e6ecf59=1492128157,149" +
"2160558,1492347839,1492764281; Hm_lpvt_0cf76c77469e965d2957f0553e6ecf59=1492764295");
httpGet.setHeader("Host", "www.xicidaili.com");
httpGet.setHeader("Pragma", "no-cache");
httpGet.setHeader("Upgrade-Insecure-Requests", "1");
httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " +
"(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
try {
//客户端执行httpGet方法,返回响应
CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
//得到服务响应状态码
if (httpResponse.getStatusLine().getStatusCode() == 200) {
entity = EntityUtils.toString(httpResponse.getEntity(), "utf-8");
}
httpResponse.close();
httpClient.close();
} catch (ClientProtocolException e) {
e.printStackTrace();
}
return entity;
}
}
没有合适的资源?快使用搜索试试~ 我知道了~
Java网络爬虫抓取代理ip
共36个文件
class:10个
java:9个
xml:4个
需积分: 50 15 下载量 171 浏览量
2018-06-12
15:55:37
上传
评论
收藏 700KB ZIP 举报
温馨提示
通过Java网络爬虫爬取指定代理ip网上的ip,利用了jsoup、httpclient技术实现
资源推荐
资源详情
资源评论
收起资源包目录
ip_proxy.zip (36个子文件)
ip_proxy
src
main
resources
java
htmlbrowser
HttpResponseDemo.java 6KB
ipfiler
IPUtils.java 2KB
IPFilter.java 1KB
htmlparse
URLFecter.java 3KB
database
DataBaseDemo.java 4KB
timeutils
MyTimeJob.java 4KB
TimeUpdate.java 2KB
IpModel
DatabaseMessage.java 1KB
IPMessage.java 1013B
webapp
index.jsp 57B
WEB-INF
lib
mysql-connector-java-5.1.7-bin.jar 693KB
web.xml 222B
test
java
.classpath 959B
.settings
org.eclipse.wst.jsdt.ui.superType.container 49B
org.eclipse.wst.common.project.facet.core.xml 252B
org.eclipse.m2e.core.prefs 90B
org.eclipse.wst.jsdt.ui.superType.name 6B
org.eclipse.wst.common.component 749B
.jsdtscope 639B
org.eclipse.jdt.core.prefs 430B
org.eclipse.wst.validation.prefs 50B
pom.xml 1KB
target
classes
htmlbrowser
HttpResponseDemo.class 4KB
ipfiler
IPFilter$1.class 939B
IPFilter.class 1KB
IPUtils.class 4KB
htmlparse
URLFecter.class 3KB
database
DataBaseDemo.class 5KB
timeutils
TimeUpdate.class 3KB
MyTimeJob.class 4KB
IpModel
DatabaseMessage.class 1KB
IPMessage.class 1KB
test-classes
m2e-wtp
web-resources
META-INF
MANIFEST.MF 115B
maven
Ip_proxy
ip_proxy
pom.properties 215B
pom.xml 1KB
.project 1KB
共 36 条
- 1
资源评论
LoserOops
- 粉丝: 0
- 资源: 2
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- (源码)基于ESP8266的WebDAV服务器与3D打印机管理系统.zip
- (源码)基于Nio实现的Mycat 2.0数据库代理系统.zip
- (源码)基于Java的高校学生就业管理系统.zip
- (源码)基于Spring Boot框架的博客系统.zip
- (源码)基于Spring Boot框架的博客管理系统.zip
- (源码)基于ESP8266和Blynk的IR设备控制系统.zip
- (源码)基于Java和JSP的校园论坛系统.zip
- (源码)基于ROS Kinetic框架的AGV激光雷达导航与SLAM系统.zip
- (源码)基于PythonDjango框架的资产管理系统.zip
- (源码)基于计算机系统原理与Arduino技术的学习平台.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功