package main;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import connection.ConnectionUtil;
import pojo.HouseInfo;
public class Analyze {
public List<HouseInfo> regexMain(int page) {
String address = "http://bj.lianjia.com/ershoufang/pg"+ page +"/";
String result = ConnectionUtil.Connect(address);
List<HouseInfo> houseList = new ArrayList<HouseInfo>();
regexTitle(houseList, result);
if (houseList != null) {
regexHouseInfo(houseList, result);
regexFloorInfo(houseList, result);
regexTotalPrice(houseList, result);
regexUnitPrice(houseList, result);
}
return houseList;
}
private void regexTitle(List<HouseInfo> houseList, String targetStr) {
Pattern pattern1 = Pattern
.compile("<div class=\"title\">\\s{1,}<a(.+?)</div>");
Matcher matcher1 = pattern1.matcher(targetStr);
while (matcher1.find()) {
//设置title
Pattern pattern2 = Pattern.compile(">[\u0391-\uFFE5]+.+?</a>");
Matcher matcher2 = pattern2.matcher(matcher1.group());
String matcherString = "";
HouseInfo info = new HouseInfo();
if (matcher2.find()) {
matcherString = matcher2.group();
info.setTitle(matcherString.substring(1,
matcherString.length() - 4));
}
//设置外连接
pattern2 = Pattern.compile("((http|https)://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)?");
matcher2 = pattern2.matcher(matcher1.group());
if (matcher2.find()) {
info.setLink(matcher2.group());
}
houseList.add(info);
}
}
private void regexHouseInfo(List<HouseInfo> houseList, String targetStr) {
Pattern pattern1 = Pattern
.compile("<div class=\"houseInfo\">(.+?)</div>");// 为什么加?才行
Matcher matcher1 = pattern1.matcher(targetStr);
int i = 0;
while (matcher1.find()) {
Pattern pattern2 = Pattern.compile(">[\u0391-\uFFE5]+(.+?)</div>");
Matcher matcher2 = pattern2.matcher(matcher1.group());
String matcherString = "";
if (matcher2.find()) {
matcherString = matcher2.group();
String info = matcherString.substring(1,
matcherString.length() - 6).replace("</a>", "");
houseList.get(i).setInfo(info);
}
++i;
}
}
private void regexFloorInfo(List<HouseInfo> houseList, String targetStr) {
Pattern pattern1 = Pattern
.compile("<div class=\"positionInfo\">\\s{1,}(.+?)<a");// 为什么加?才行
Matcher matcher1 = pattern1.matcher(targetStr);
int i = 0;
while (matcher1.find()) {
Pattern pattern2 = Pattern.compile(">[\u0391-\uFFE5]+(.+?)<a");
Matcher matcher2 = pattern2.matcher(matcher1.group());
String matcherString = "";
if (matcher2.find()) {
matcherString = matcher2.group();
String floor = matcherString.substring(1,
matcherString.length() - 5);
houseList.get(i).setFloor(floor);
}
++i;
}
}
private void regexTotalPrice(List<HouseInfo> houseList, String targetStr) {
Pattern pattern1 = Pattern
.compile("<div class=\"totalPrice\">.+?</div>");// 为什么加?才行
Matcher matcher1 = pattern1.matcher(targetStr);
int i = 0;
while (matcher1.find()) {
Pattern pattern2 = Pattern.compile("<span>.+</div>");
Matcher matcher2 = pattern2.matcher(matcher1.group());
String matcherString = "";
if (matcher2.find()) {
matcherString = matcher2.group();
String totalPrice = matcherString.substring(6,
matcherString.length() - 6).replace("</span>", "");
houseList.get(i).setTotalPrice(totalPrice);
}
++i;
}
}
private void regexUnitPrice(List<HouseInfo> houseList, String targetStr) {
Pattern pattern1 = Pattern.compile("<span>单价.+?</span>");// 为什么加?才行
Matcher matcher1 = pattern1.matcher(targetStr);
int i = 0;
String matcherString = "";
while (matcher1.find()) {
matcherString = matcher1.group();
String unitPrice = matcherString.substring(6,
matcherString.length() - 7);
houseList.get(i).setUnitPrice(unitPrice);
++i;
}
}
}
- 1
- 2
- 3
- 4
前往页