package com.gs.simple.impl.crawl;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.Socket;
import com.gs.simple.iface.crawl.ICrawl;
import com.gs.simple.pojos.CrawlResultPojo;
import com.gs.simple.pojos.UrlPojo;
public class SocketCrawlerImpl implements ICrawl {
public CrawlResultPojo crawl(UrlPojo urlPojo) {
CrawlResultPojo crawlResultPojo = new CrawlResultPojo();
if (urlPojo == null || urlPojo.getUrl() == null) {
crawlResultPojo.setIsSuccess(false);
crawlResultPojo.setPageContent(null);
return crawlResultPojo;
}
String host = urlPojo.getHost();
if (host == null) {
crawlResultPojo.setIsSuccess(false);
crawlResultPojo.setPageContent(null);
return crawlResultPojo;
}
BufferedWriter bw = null;
BufferedReader br = null;
try {
Socket socket = new Socket(host, 80);
bw = new BufferedWriter(new OutputStreamWriter(
socket.getOutputStream()));
bw.write("GET " + urlPojo.getUrl() + " HTTP 1.0\r\n");
bw.write("Host:" + host + "\r\n");
bw.write("\r\n");
bw.flush();
br = new BufferedReader(new InputStreamReader(
socket.getInputStream(),"utf-8"));
String line = null;
StringBuilder stringBuilder = new StringBuilder();
while ((line = br.readLine()) != null) {
// System.out.println(line);
stringBuilder.append(line + "\n");
}
crawlResultPojo.setIsSuccess(true);
crawlResultPojo.setPageContent(stringBuilder.toString());
return crawlResultPojo;
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (bw != null) {
bw.close();
}
if (br != null) {
br.close();
}
} catch (Exception e) {
e.printStackTrace();
System.out.println("流未正确关闭");
}
}
return null;
}
public static void main(String[] args) {
SocketCrawlerImpl socketCrawlImpl = new SocketCrawlerImpl();
UrlPojo urlPojo = new UrlPojo("http://www.baidu.com");
socketCrawlImpl.crawl(urlPojo);
System.out.println("down!");
}
}
没有合适的资源?快使用搜索试试~ 我知道了~
基于Socket和HttpURLConnection的爬虫实例
共20个文件
class:8个
java:8个
prefs:2个
3星 · 超过75%的资源 需积分: 34 442 下载量 190 浏览量
2015-11-01
11:19:06
上传
评论 27
收藏 20KB ZIP 举报
温馨提示
基于Socket和HttpURLConnection的爬虫实例,适合初学者参考。
资源推荐
资源详情
资源评论
收起资源包目录
SimpleSpiderDemo2.zip (20个子文件)
SimpleSpiderDemo2
bin
com
gs
simple
iface
crawl
ICrawl.class 206B
control
SystemControler.class 1KB
manager
CrawlerManager.class 1KB
test
pojos
UrlPojo.class 2KB
CrawlResultPojo.class 1021B
enumeration
TaskLevel.class 1KB
impl
crawl
SocketCrawlerImpl.class 3KB
HttpUrlConnectionCrawlerImpl.class 3KB
test
com
gs
simple
test
.settings
org.eclipse.core.resources.prefs 57B
org.eclipse.jdt.core.prefs 598B
src
com
gs
simple
iface
crawl
ICrawl.java 201B
control
SystemControler.java 553B
manager
CrawlerManager.java 981B
pojos
CrawlResultPojo.java 618B
UrlPojo.java 1KB
enumeration
TaskLevel.java 84B
impl
crawl
SocketCrawlerImpl.java 2KB
HttpUrlConnectionCrawlerImpl.java 2KB
.project 393B
.classpath 344B
共 20 条
- 1
资源评论
- zzzzhhhhwwww2015-11-20还行,我觉得比较好。
- 问题不大的2015-11-13一般般,可以参考。
- qq_264313492015-11-26刚刚入门,正在努力中0.0
- huawei0022015-11-08马马虎虎,不太好
- fengfengred2015-12-03还行吧,有点帮助
gaosh96
- 粉丝: 4
- 资源: 9
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功