package MultiHandling;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import Downloadandsave.DownloadURLFile;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
public class MultiThread implements Runnable {
private static BufferedReader br = null;
private List<String> list;
private String paperdir = "";
String myDomain = "/paper/";
static {
try {
br = new BufferedReader(new FileReader("report.txt"), 10);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
public void run() {
String line = null;
while (true) {
this.list = new ArrayList<String>();
synchronized (br) {
try {
while ((line = br.readLine()) != null) {
paperdir = line.substring(line.indexOf("/paper/") + "/paper/".length(), line.length());
System.out.println("===============准备开始下载 " + paperdir + " 的全文及相关资源 ================");
prepareDownFiles(line);
break;
}
} catch (IOException e) {
e.printStackTrace();
}
}
try {
Thread.sleep(3000);
downFiles(this.list);
} catch (InterruptedException e) {
e.printStackTrace();
}
if (line == null)
break;
}
}
public void downFiles(List<String> list) throws InterruptedException {
for (String str : list) {
System.out.println("===============正在下载 " + str + " ================");
DownloadURLFile a = new DownloadURLFile();
a.downloadFromUrl(str, "Test" + File.separator + paperdir);
}
}
public void prepareDownFiles(String line) {
try {
URL url = new URL(line);
URLConnection conn = url.openConnection();
conn.setDoOutput(true);
InputStream is = null;
is = url.openStream();
BufferedReader bReader = new BufferedReader(new InputStreamReader(is));
String rLine = null;
String tmp_rLine = null;
while ((rLine = bReader.readLine()) != null) {
tmp_rLine = rLine;
int str_len = tmp_rLine.length();
if (str_len > 0) {
String regUrl = "(?<=(href=)[\"]?[\']?)[^\\s\"\'\\?]*(" + myDomain + ")[^\\s\"\'>]*";
Pattern p = Pattern.compile(regUrl, Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(tmp_rLine);
boolean blnp = m.find();
while (blnp == true) {
list.add("https://papers.nips.cc" + m.group(0));
tmp_rLine = tmp_rLine.substring(m.end(), tmp_rLine.length());
m = p.matcher(tmp_rLine);
blnp = m.find();
}
}
tmp_rLine = null;
}
is.close();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}