package com;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
public class Tdd {
public static void main(String[] args) throws IOException {
Random r = new Random();
String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.3 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.277.400 QQBrowser/9.4.7658.400",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 UBrowser/5.6.12150.8 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7",
"Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/60.0"};
int i = r.nextInt(14);
Map<String,String> map = new HashMap<String,String>();
map.put(":authority:","https://www.baidu.com/");
// 解析Url地址 参数1:访问的url,参数2:访问的超时时间
Document doc = Jsoup.connect("https://www.baidu.com/")
.timeout(5000)
.data(map)
.ignoreContentType(true)
.userAgent(ua[i])
.header("referer","https://www.baidu.com/")
.post();
// 使用选择器,获取想要的内容
Elements elements =doc.getElementsByTag("img");
int id = 11110;
for(Element element:elements){
System.out.println(element.toString());
System.out.println(element.attr("src"));
URL target = new URL("https:"+element.attr("src"));
// URLConnection urlConnection = target.openConnection();
HttpURLConnection httpURLConnection = (HttpURLConnection) target.openConnection();
httpURLConnection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36");
// httpURLConnection.setRequestProperty("Referer", "https://www.baidu.com/");
//获取输入流
InputStream inputStream = httpURLConnection.getInputStream();
//获取输出流 这里是下载保存图片到本地的路径
OutputStream outputStream = new FileOutputStream("C:\\Users\\Administrator\\Desktop\\pythonxm\\表情包\\" + id +".jpg");
int temp = 0;
while ((temp = inputStream.read()) != -1) {
outputStream.write(temp);
}
outputStream.flush();
outputStream.close();
System.out.println(id + ".jpg下载完毕!!!");
id++;
System.out.println(element.toString());
}
}
}
评论0