package action;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.List;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.params.ClientPNames;
import org.apache.http.client.params.CookiePolicy;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.entity.mime.HttpMultipartMode;
import org.apache.http.entity.mime.MultipartEntity;
import org.apache.http.entity.mime.content.FileBody;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import service.ImageParseService;
import service.impl.RuokuaiHttpApiImpl;
import common.PageElement;
public class Test5
{
public static DefaultHttpClient httpclient = new DefaultHttpClient();
public static String index_url = "https://www.sgs.gov.cn/notice/home";
public static String login_url = "https://www.sgs.gov.cn/notice/search/ent_info_list";
public static String img_path_url = "https://www.sgs.gov.cn/notice/search/popup_captchas";
public static String image_save_path = "d://vcode.jpg";
public static String update_url = "http://upload.zhuanmi.net/web/upload.do";
public static String userid = "1883879";
public static String modify_file_url = "http://newhome.400gb.com/iajax.php?item=file_act";
public static String delete_file_url = "http://newhome.400gb.com/iajax.php?item=file_act&action=file_delete&task=file_delete&file_id=";
private static final Logger log = LoggerFactory
.getLogger(PageElement.class);
private static final Test5 instance = new Test5();
public static Test5 getInstance() {
return instance;
}
private Test5() {
}
public static void main(String[] args) throws Exception
{
//
String cookie = imgCookie();
System.out.println("img cookie " + cookie);
String code = identifyImg();
String captcha = code.substring(1, code.length()-1);
cookie = loginCookie(captcha, cookie);
// System.out.println("login cookie " + cookie);
}
public static String imgCookie()
{
BufferedReader in = null;
try
{
httpclient = (DefaultHttpClient) wrapClient(httpclient);
httpclient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 20000);
HttpGet httpGet = new HttpGet(img_path_url);
httpGet.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT, 20000);
//httpGet.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BROWSER_COMPATIBILITY);
HttpResponse response = httpclient.execute(httpGet);
//保存图片
download(response.getEntity().getContent(), image_save_path);
List<Cookie> cookies = httpclient.getCookieStore().getCookies();
System.out.println(cookies.size());
//httpGet.releaseConnection();
StringBuilder cookiesSB = new StringBuilder();
String cookie = "";
System.out.println("第一次cookie");
if (cookies.isEmpty())
{
System.out.println("None");
} else
{
for (int i = 0; i < cookies.size(); i++)
{
// System.out.println("- " + cookies.get(i).toString());
cookiesSB.append(cookies.get(i).getName()).append("=")
.append(cookies.get(i).getValue());
System.out.println(cookiesSB.toString());
cookie = cookies.get(i).toString();
}
}
return cookiesSB.toString();
} catch (Exception e)
{
e.printStackTrace();
return null;
} finally{
if(in != null)
try
{
in.close();
} catch (IOException e)
{
e.printStackTrace();
}
}
}
public static String identifyImg()
{
// 使用了 uusise.com 提供的java识别接口<span></span>
//String str = CQZDMDLL.result(image_save_path);\
ImageParseService biz = RuokuaiHttpApiImpl.getInstance();
String str = biz.readFromImage(new File("d:\\vcode.jpg"));
System.out.println(str);
if(str != null && str.trim().matches("\\d{4,}"))
return str;
return str;
}
public static String getSessionToken(String cookie){
String html = getHtmlByUrl(login_url,cookie,httpclient);
String href = null;
String title = null;
if (html != null && !"".equals(html)) {
Document doc = Jsoup.parse(html);
Elements linksElements = doc.select("input[name=session.token]");
for (Element ele : linksElements) {
href = ele.attr("value");
title = ele.text();
// System.out.println(href+","+title);
}
}
return href;
}
public static String loginCookie(String code, String cookie)
{
try
{
//httpclient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 20000);
// 2 用户登录
HttpPost httppost = new HttpPost(login_url);
String html = getHtmlByUrl(login_url,cookie,httpclient);
String href = null;
String title = null;
if (html != null && !"".equals(html)) {
Document doc = Jsoup.parse(html);
Elements linksElements = doc.select("input[name=session.token]");
for (Element ele : linksElements) {
href = ele.attr("value");
title = ele.text();
// System.out.println(href+","+title);
}
}
System.out.println(href);
httppost.setHeader("Cookie", cookie);
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("searchType", "1"));
nvps.add(new BasicNameValuePair("captcha", "星火燎原"));
nvps.add(new BasicNameValuePair("session.token","4159a584-f783-4d4a-bf7a-4e9c32854e3d"));
nvps.add(new BasicNameValuePair("condition.keyword", "上海
没有合适的资源?快使用搜索试试~ 我知道了~
针对 httpclient4.* 绕验证码获取公司信息 包括 jsoup网页信息的爬虫及htmlUnit对动态网站信息的抓取
共122个文件
jar:55个
class:25个
java:17个
4星 · 超过85%的资源 需积分: 13 138 下载量 152 浏览量
2015-04-20
11:04:23
上传
评论 4
收藏 54.66MB ZIP 举报
温馨提示
针对 httpclient4.* 绕验证码获取公司信息 包括 jsoup网页信息的爬虫及htmlUnit对动态网站信息的抓取
资源推荐
资源详情
资源评论
收起资源包目录
针对 httpclient4.* 绕验证码获取公司信息 包括 jsoup网页信息的爬虫及htmlUnit对动态网站信息的抓取 (122个子文件)
Test5.class 20KB
TestJS.class 13KB
Demo5.class 11KB
Test.class 11KB
HttpClientHelper.class 7KB
PageElement.class 7KB
PicDownload.class 7KB
Index.class 4KB
Tess4jImpl.class 4KB
Configuration.class 3KB
RuokuaiHttpApiImpl.class 3KB
CommonImpl.class 2KB
CommonImpl.class 2KB
Index.class 1KB
TAdmin.class 1KB
PageElement$1.class 948B
TestJS$1.class 928B
Test5$1.class 924B
Demo5$1.class 924B
CaptchaUtil.class 880B
Condition.class 523B
Session.class 511B
ICommon.class 373B
ICommon.class 373B
ImageParseService.class 217B
.classpath 4KB
org.eclipse.wst.common.component 450B
org.eclipse.wst.jsdt.ui.superType.container 49B
tess4j-1.4.1.jar 29.04MB
hibernate3.jar 3.95MB
xalan-2.7.1.jar 3.03MB
ojdbc-6.jar 2.59MB
aspectjweaver-1.5.3.jar 1.8MB
htmlunit-2.14.jar 1.38MB
xercesImpl-2.11.0.jar 1.3MB
htmlunit-core-js-2.14.jar 1002KB
freemarker-2.3.16.jar 860KB
org.springframework.context-3.1.0.M2.jar 791KB
struts2-core-2.2.3.jar 756KB
httpclient-4.4.jar 702KB
javassist-3.12.0.GA.jar 618KB
xwork-core-2.2.3.jar 602KB
c3p0-0.9.1.jar 594KB
org.springframework.beans-3.1.0.M2.jar 571KB
commons-collections-3.1.jar 546KB
mysql-connector-java-5.0.8-bin.jar 528KB
org.springframework.web-3.1.0.M2.jar 480KB
antlr-2.7.6.jar 430KB
org.springframework.core-3.1.0.M2.jar 420KB
commons-lang3-3.3.2.jar 403KB
org.springframework.jdbc-3.1.0.M2.jar 389KB
cssparser-0.9.13.jar 347KB
org.springframework.orm-3.1.0.M2.jar 344KB
org.springframework.aop-3.1.0.M2.jar 322KB
httpcore-4.4.jar 314KB
dom4j-1.6.1.jar 307KB
jetty-util-8.1.14.v20131031.jar 281KB
jsoup-1.6.1.jar 275KB
commons-lang-2.5.jar 273KB
serializer-2.7.1.jar 272KB
commons-codec-1.9.jar 258KB
org.springframework.transaction-3.1.0.M2.jar 241KB
ognl-3.0.1.jar 224KB
xml-apis-1.4.01.jar 215KB
org.springframework.expression-3.1.0.M2.jar 172KB
commons-io-2.0.1.jar 156KB
jdom.jar 149KB
jetty-websocket-8.1.14.v20131031.jar 125KB
nekohtml-1.9.20.jar 122KB
jetty-io-8.1.14.v20131031.jar 102KB
hibernate-jpa-2.0-api-1.0.1.Final.jar 100KB
jetty-http-8.1.14.v20131031.jar 94KB
commons-logging-1.1.1 - 副本.jar 59KB
commons-logging-1.1.1.jar 59KB
commons-fileupload-1.2.2.jar 58KB
org.springframework.asm-3.1.0.M2.jar 52KB
httpmime-4.4.jar 40KB
slf4j-api-1.7.10.jar 31KB
slf4j-api-1.6.1.jar 25KB
struts2-spring-plugin-2.2.3.jar 21KB
sac-1.3.jar 15KB
jta-1.1.jar 11KB
aopalliance-1.0.jar 4KB
Test5.java 27KB
TestJS.java 15KB
Demo5.java 13KB
Test.java 11KB
PageElement.java 6KB
PicDownload.java 6KB
HttpClientHelper.java 5KB
Index.java 4KB
Tess4jImpl.java 3KB
Configuration.java 2KB
RuokuaiHttpApiImpl.java 2KB
CommonImpl.java 970B
CaptchaUtil.java 608B
Condition.java 510B
Session.java 486B
ICommon.java 432B
ImageParseService.java 402B
共 122 条
- 1
- 2
li_yaya
- 粉丝: 17
- 资源: 7
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
- 1
- 2
前往页