package crawler.sina.utils;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.HttpVersion;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.conn.params.ConnRouteParams;
import org.apache.http.cookie.Cookie;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.ExecutionContext;
import org.apache.http.protocol.HTTP;
import org.apache.http.protocol.HttpContext;
import crawler.sina.craw.CrawSina;
import crawler.sina.login.Constant;
/**
* http操作相关的类
*/
public class HttpUtils {
/*
* params :
* url: 地址
* headers请求头部信息
* return : httpresponse响应
*/
public static HttpResponse doGet(String url,Map<String,String> headers){
HttpClient client=createHttpClient();
HttpGet getMethod=new HttpGet(url);
HttpResponse response=null;
HttpContext httpContext = new BasicHttpContext();
try {
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
getMethod.addHeader(key, headers.get(key));
}
}
response=client.execute(getMethod);
HttpUriRequest realRequest = (HttpUriRequest)httpContext.getAttribute(ExecutionContext.HTTP_REQUEST);
System.out.println(realRequest.getURI());
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
String msg=e.getMessage();
if(msg.contains("Truncated chunk")){
System.out.println(e.getMessage() +" 数据获取不完整,需要重新获取。");
}else{
System.out.println(e.getMessage() +" 连接被拒绝,需要降低爬取频率。");
}
} catch(Exception e){
}
return response;
}
/*
* params :
* url: 地址
* headers:请求头部信息
* params:post的请求数据
* return : httpresponse响应
*/
public static HttpResponse doPost(String url,Map<String,String> headers,Map<String,String> params){
HttpClient client=createHttpClient();
HttpPost postMethod=new HttpPost(url);
HttpResponse response=null;
try {
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
postMethod.addHeader(key, headers.get(key));
}
}
List<NameValuePair> p=null;
if(params!=null && params.keySet().size()>0){
p=new ArrayList<NameValuePair>();
for(String key:params.keySet()){
p.add(new BasicNameValuePair(key,params.get(key)));
}
}
if(p!=null)
postMethod.setEntity(new UrlEncodedFormEntity(p,HTTP.UTF_8));
response=client.execute(postMethod);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return response;
}
//上传一个文件
public static HttpResponse doPost(String url,Map<String,String> headers,String fileName){
HttpClient client=createHttpClient();
HttpPost postMethod=new HttpPost(url);
String boundary = "";
HttpResponse response=null;
try {
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
postMethod.addHeader(key, headers.get(key));
if(key.equals("Content-Type")){
String tmp=headers.get(key);
boundary=tmp.substring(tmp.indexOf("=")+1);
}
}
}
File file=new File(fileName);
InputStream in=new FileInputStream(file);
StringBuffer buffer=new StringBuffer();
buffer.append(boundary).append("\n")
.append("Content-Disposition: form-data; name=\"pic1\"; filename=\""+file.getName()).append("\"\n")
.append("Content-Type: image/pjpeg").append("\n")
.append("\n");
System.out.println(buffer.toString());
String tmpstr=Utils.getStringFromStream(in);
tmpstr=Base64Encoder.encode(tmpstr.getBytes());
buffer.append(tmpstr).append("\n");
buffer.append(boundary+"--").append("\n");
System.out.println(buffer.toString());
in=new ByteArrayInputStream(buffer.toString().getBytes());
InputStreamEntity ise=new InputStreamEntity(in,buffer.toString().getBytes().length);
postMethod.setEntity(ise);
response=client.execute(postMethod);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return response;
}
/*
* params :
* httpresponse
* return : 响应的头部信息
*/
public static List<Header> getReponseHeaders(HttpResponse response){
List<Header> headers=null;
Header[] hds=response.getAllHeaders();
if(hds!=null && hds.length>0){
headers=new ArrayList<Header>();
for(int i=0;i<hds.length;i++){
headers.add(hds[i]);
}
}
return headers;
}
/*
* params :
* headers:头部信息
* request:请求
*/
public static void setHeaders(Map<String,String> headers,HttpUriRequest request){
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
request.addHeader(key, headers.get(key)); }
}
}
/*
* params :
* httpresponse
* return : 响应的cookies值
*/
public static List<Cookie> getResponseCookies(HttpResponse response){
List<Cookie> cookies=null;
CrawSina.Cookie = "";
Header[] hds=response.getAllHeaders();
if(hds!=null && hds.length>0){
for(int i=0;i<hds.length;i++){
if(hds[i].getName().equalsIgnoreCase("Set-Cookie")){
if(cookies==null){
cookies=new ArrayList<Cookie>();
}
- 1
- 2
- 3
前往页