package com.zl.spyder;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CookieStore;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.AbstractHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.zl.config.Consts;
import com.zl.dao.ILogin;
import com.zl.daoimpl.Login;
/**
* 教务系统爬虫主程序
*
* @author 郑龙
*
*/
public class JWSysCrawler {
/**
* 定义一个LOG
*/
private static Log log = LogFactory.getLog(JWSysCrawler.class);
/**
* 声明登录用接口对象
*/
private static ILogin login = new Login();
/**
* 使用CookieStore存储cookies
*/
static CookieStore cookieStore = null;
public static StringBuilder sb = new StringBuilder();
public static DefaultHttpClient client = new DefaultHttpClient();
static String cookie = null;
private static String account, passwd;
/**
* 声明一些要用到的URL
*/
private static String mainURL = "";// 主页url
private static String loginURL = "";// 登录页url
private static String codeURL = "";// 验证码页url
private static String host = "";// 主机地址
private static String index = "";// 登录后的main页面url
private static String schedule = "";// 课表页url
public static DefaultHttpClient getClient() {
return client;
}
public static void setClient(DefaultHttpClient client) {
JWSysCrawler.client = client;
}
/**
* (必须)在网络请求之前对爬虫类的URL地址进行初始化
*
* @param main
* 主页url
* @param login
* 登录页url
* @param code
* 验证码页url
* @param hosts
* 主机地址
* @param indexUrl
* 登录后的main页面url
* @param scheduleURL
* 课表页url
*/
public static void initURL(String main, String login, String code, String hosts, String indexUrl,
String scheduleURL) {
mainURL = main;
loginURL = login;
codeURL = code;
host = hosts;
index = indexUrl;
schedule = scheduleURL;
}
/***
* 字符流转换为字符串
*
* @param inputStream
* 字节流
* @param encode
* 字符串
*/
private static String changInputStream(InputStream inputStream, String encode) {
ByteArrayOutputStream OutputStream = new ByteArrayOutputStream();
byte[] data = new byte[1024];
int len = 0;
String result = "";
if (inputStream != null) {
try {
while ((len = inputStream.read(data)) != -1) {
OutputStream.write(data, 0, len);
}
result = new String(OutputStream.toByteArray(), encode);
} catch (IOException e) {
e.printStackTrace();
}
}
return result;
}
/**
* 模拟登陆
*/
private static boolean sendHttpClientPost(String path, Map<String, String> map, String encode, String cookie) {
List<NameValuePair> list = new ArrayList<NameValuePair>();
if (map != null && !map.isEmpty()) {
for (Map.Entry<String, String> entry : map.entrySet()) {
list.add(new BasicNameValuePair(entry.getKey(), entry.getValue()));
}
}
try {
// 实现将请求的参数封装到表单中
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list, encode);
HttpPost httpPost = new HttpPost(path);
httpPost.setHeader("Accept-Encoding", "gzip, deflate, sdch");
httpPost.setHeader("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36");
httpPost.setHeader("Content-Type", "application/x-www-form-urlencoded");
httpPost.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
httpPost.setHeader("Cookie", sb.toString());
httpPost.setHeader("Referer", loginURL);
httpPost.setHeader("Origin", mainURL);
httpPost.setHeader("Host", host);
httpPost.setHeader("Connection", "keep-alive");
httpPost.setEntity(entity);
HttpResponse httpResponse = client.execute(httpPost);
httpPost.abort();
// 设置请求头
Header headers[] = httpPost.getAllHeaders();
try {
CookieStore set = client.getCookieStore();
System.out.println("LOGIN cookie-->" + set.toString());
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.error("Method \"sendHttpClientPost\" Error! On " + new Date(), e);
}
sb = new StringBuilder();
List<Cookie> cookies = ((AbstractHttpClient) client).getCookieStore().getCookies();
for (int j = 0; j < cookies.size(); j++) {
sb.append(cookies.get(j).getName() + "=" + cookies.get(j).getValue() + ";");
}
System.out.print("成功后的Cookie---->" + sb.toString());
// 随便测试一个登陆后才能看见的网页看看能不能访问,能访问则表示成功,否则表示登录失败
HttpGet main1 = new HttpGet("http://210.42.72.73:888/jwweb/KSSW/stu_ksap_rpt.aspx");
HttpResponse main1httpResponse = client.execute(main1);
int status = main1httpResponse.getStatusLine().getStatusCode();
String res = changInputStream(main1httpResponse.getEntity().getContent(), encode);
if (status == 200) {
// 如果返回码是200
// System.out.println("\n登录:\t" + res + "\nlength:\t" + res.length());// debug
// 如果请求成功
if (res.length() == 125) {
// 判断返回的结果字符串,如果是提示登录,则说明登录不成功
return false;
}
/* 将账号密码数据插入数据库中 */
switch (login.isExistValue(account, passwd)) {
case Login.INSERT_VALUE:
int result = login.saveUserAccountInfo(account, passwd);
System.out.println("\ninsert on :\t" + result);
break;
case Login.UPDATE_VALUE:
result = login.updateAccountInfo(account, passwd);
System.out.println("\nupdate on :\t" + result);
default:
System.out.println("\nno change!");
break;
}
return true;// 返回登录成功
} else {
return false;
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
log.error("Method \"sendHttpClientPost\" Error! On " + new Date(), e);
} catch (ClientProtocolException e) {
e.printStackTrace();
log.error("Method \"sendHttpClientPost\" Error! On " + new Date(), e);
} catch (IOException e) {
e.printStackTrace();
log.error("Method \"sendHttpClientPost\" Error! On " + new Date(), e);
}
return false;
}
/**
* 获取考试安排表的方法
*/
public static boolean getStuExamInfo(Map<String, String> map) {
List<NameValuePair> list = new ArrayList<NameValuePair>();
if (map != null && !map.isEmpty()) {
for (Map.Entry<String, String> entry : map.entrySet()) {
list.add(new BasicNameValuePair(entry.getKey(), entry.getValue()));
}
}
try {
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list, "GBK");
HttpPost httpPost = new HttpPost(Consts.stuExamURL);
/* 设置一些请求头参数 */
httpPost.setHeader("Accept-Encoding", "deflate, sdch");// 这里不能接受gzip编码,否则后面对网页体的解析会出