import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.List;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlImage;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlParagraph;
import com.sun.image.codec.jpeg.JPEGCodec;
import com.sun.image.codec.jpeg.JPEGImageEncoder;
public class GetDate {
public static void main(String[] args) throws Exception {
GetDate getDate=new GetDate();
if(args.length==0){
getDate.getUrl();
}else if(args.length>0){
String url=args[0];
getDate.getInfo(url);
}
}
public void getUrl() throws Exception{
WebClient l_webClient = new WebClient();
HtmlPage l_htmlPage = (HtmlPage)l_webClient.getPage("http://en.beijing2008.cn/spectators/beijing/shopping/list/");
List<HtmlDivision> htmlDivs=(List<HtmlDivision>) l_htmlPage.getDocumentElement().getHtmlElementsByTagName("div");
for(HtmlDivision htmlDiv:htmlDivs){
if(htmlDiv.getClassAttribute().equals("f12list")){
List<HtmlAnchor> htmlAnchors=(List<HtmlAnchor>) htmlDiv.getHtmlElementsByTagName("a");
for(HtmlAnchor htmlAnchor:htmlAnchors){
System.out.println(htmlAnchor.asText());
System.out.println(htmlAnchor.getHrefAttribute());
}
}
}
}
public void getInfo(String url) throws Exception{
WebClient l_webClient = new WebClient();
HtmlPage l_htmlPage = (HtmlPage)l_webClient.getPage(url);
HtmlElement htmlElementTitle=l_htmlPage.getHtmlElementById("print_title");
HtmlElement htmlElementContent=l_htmlPage.getHtmlElementById("newsContent");
List<HtmlParagraph> htmlPs=(List<HtmlParagraph>) htmlElementContent.getHtmlElementsByTagName("p");
HtmlImage htmlImage=(HtmlImage) htmlElementContent.getHtmlElementsByTagName("IMG").get(0);
URL imageURL = new URL(htmlImage.getSrcAttribute());
URLConnection uc = imageURL.openConnection();
InputStream is = uc.getInputStream();
Image src = javax.imageio.ImageIO.read(is);
int wideth=src.getWidth(null);
int height=src.getHeight(null);
BufferedImage tag = new BufferedImage(200,100,BufferedImage.TYPE_INT_RGB);
tag.getGraphics().drawImage(src,0,0,200,100,null);
FileOutputStream out=new FileOutputStream("c:/Unos/"+htmlElementTitle.asText()+".jpg");
JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out);
encoder.encode(tag);
out.close();
is.close();
System.out.println(htmlElementTitle.asText());
for(HtmlParagraph htmlP:htmlPs){
System.out.println(htmlP.asText());
}
}
}
html_getData.rar_getdata_htmlutil 使用_获取网页
版权申诉
65 浏览量
2022-09-19
13:56:32
上传
评论
收藏 7.02MB RAR 举报
alvarocfc
- 粉丝: 105
- 资源: 1万+
最新资源
- 筷手引流工具.apk
- 论文(最终)_20240430235101.pdf
- 基于python编写的Keras深度学习框架开发,利用卷积神经网络CNN,快速识别图片并进行分类
- 最全空间计量实证方法(空间杜宾模型和检验以及结果解释文档).txt
- 5uonly.apk
- 蓝桥杯Python组的历年真题
- 2023-04-06-项目笔记 - 第一百十九阶段 - 4.4.2.117全局变量的作用域-117 -2024.04.30
- 2023-04-06-项目笔记 - 第一百十九阶段 - 4.4.2.117全局变量的作用域-117 -2024.04.30
- 前端开发技术实验报告:内含4四实验&实验报告
- Highlight Plus v20.0.1
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈