package com.trace.arinside;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import junit.framework.TestCase;
public class testContentFetcher extends TestCase {
// http://tracews01.phx.ebay.com/arinside/schema/129/index.htm
private static URLConnection connection;
private static String tableHeadString="FieldName,FieldID,DataType,In Views,ModifiedTime,Owner,FiltersInfoList\n";
private static int flag=0;
private static String subURLString="";
private static void connect(String urlString) {
try {
//String urlString;
// urlString = "http://tracews01.phx.ebay.com/arinside/schema/129/index.htm";
URL url = new URL(urlString);
connection = url.openConnection();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private static int regExpParser(String inputLine)
{
// int flag=0;
Pattern p=null;
Matcher m=null;
boolean b=false;
// System.out.print("inputline:"+inputLine);
// p=Pattern.compile("\b(fieldListAll)\b");
p=Pattern.compile("<td>(.*?)</td>");
inputLine=" fieldListAll";
m=p.matcher(inputLine);
b=m.matches();
if(b){
flag=1;//means the head
System.out.print("ok");
return flag;
}
p.compile("</table>");
m=p.matcher(inputLine);
b=m.matches();
if(b){
flag=2;//means the tail
return flag;
}
if(flag==1)flag=3;//means the body
return flag;
}
private static String subFilterInfoHandler(String input)
{
String result="";
//input="http://localhost:8080/test/600013353.html";
connect(input);
BufferedReader in = null;
try {
in = new BufferedReader(new InputStreamReader(connection
.getInputStream()));
String inputLine;
while ((inputLine = in.readLine()) != null){
//System.out.print("here\n");
//System.out.print(inputLine+"\n");
result+=parseForFilter(inputLine);
}
}
catch (IOException e) {
e.printStackTrace();
}
return result;
}
private static void readContents() {
BufferedReader in = null;
String result="";
try {
in = new BufferedReader(new InputStreamReader(connection
.getInputStream()));
String inputLine;
while ((inputLine = in.readLine()) != null&&(parseForTR(inputLine).equals("")));
result=parseForTR(inputLine);
String tmpURL=subURLString;
while ((inputLine = in.readLine()) != null&&(!parseForTail(inputLine))) {
if(!parseForTR(inputLine).equals(""))
{
if(!result.equals("")){
result+=subFilterInfoHandler(tmpURL);
writeContents(result+"\n");
tmpURL=subURLString;
}
result=parseForTR(inputLine);
//System.out.print(result+"\n");
continue;
}
if(!parseForTD(inputLine).equals(""))
{
result+=parseForTD(inputLine);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
private static String parseResult(String fetchRawContent){
ArrayList result=new ArrayList();
result.add(fetchRawContent.split("<tr>"));
int i=0;
Iterator it=result.iterator();
while(it.hasNext())
{
String[] temp=new String[100];
temp=((String)it.next()).split("\n");
}
return null;
}
private static boolean parseForTail(String inputLine){
Pattern p=null;
Matcher m=null;
boolean b=false;
p=Pattern.compile("</table>");
m=p.matcher(inputLine);
b=m.matches();
return b;
}
private static String parseForTR(String input){
Pattern p=Pattern.compile("<td><a href=\"../../schema/129/(.*?)\">(.*?)</a>");
Matcher m=p.matcher(input);
boolean b=false;
m.lookingAt();
String result="";
//b=m.matches();
try{
result=m.group(2)+",";
subURLString="http://tracews01.phx.ebay.com/arinside/schema/129/"+m.group(1);
}
catch(Exception e){
}
//result=m.group(0)+",";
//System.out.print(result+"\n");
return result;
}
private static String parseForTD(String input){
Pattern p=null;
Matcher m=null;
//boolean b=false;
String result="";
p=Pattern.compile("<td>(.*?)</td>");
m=p.matcher(input);
m.lookingAt();
try{
result=m.group(1);
Pattern subpattern=Pattern.compile("<a href=\"(.*?)\">(.*?)</a>");
Matcher submatcher=subpattern.matcher(result);
if( submatcher.matches()){
try{
result=submatcher.group(2);
}
catch(Exception e)
{
}
}
subpattern=Pattern.compile(" ");
submatcher=subpattern.matcher(result);
if(submatcher.find())result=result.replaceAll(" ", " ");
result+=",";
}
catch(Exception e){
}
// System.out.print(result+"\n");
return result;
}
private static String parseForFilter(String input)
{
Pattern p=Pattern.compile("<td><img src=\"../../img/filter.gif\" width=\"16\" height=\"16\" alt=\"filter.gif\"><a href=\"(.*?)\">(.*?)</a>(.*?)</td>");
Matcher m=p.matcher(input);
boolean b=false;
m.lookingAt();
String result="";
//b=m.matches();
try{
result=m.group(2)+"\r"+",,,,,,";
}
catch(Exception e){
}
System.out.print("filter:"+result+"\r");
return result;
}
private static void writeContents(String outputLine){
try{
String filePath="ARInsideFetchContents.csv";
File file = new File(filePath);
if(!file.exists())
{
FileWriter filewriter = new FileWriter(file, true);
//System.out.print("the file doesn't exist..\n");
filewriter.write(tableHeadString);
filewriter.close();
}
FileWriter filewriter = new FileWriter(file, true);
filewriter.write(outputLine);
filewriter.close();
}catch(IOException e){
e.printStackTrace();
}catch (Exception ex) {
ex.printStackTrace();
}
}
public static void main(String[] args) {
String urlMain="http://tracews01.phx.ebay.com/arinside/schema/129/index.htm";
//String urlMain="http://localhost:8080/test/index.html";
connect(urlMain);
readContents();
}
}
JAVA正则表达式小程序
需积分: 0 155 浏览量
2009-01-15
12:07:47
上传
评论
收藏 2KB RAR 举报
blueberry1228
- 粉丝: 1
- 资源: 3
最新资源
- 基于Matlab人脸肤色定理的教师人数统计+源代码+全部数据+文档说明+详细注释+使用说明+截图(高分课程设计)
- 基于Matlab霍夫曼变换的表盘读数识别+源代码+全部数据+文档说明+详细注释+使用说明+截图(高分课程设计)
- 基于Matlab火灾烟雾检测源码带GUI界面+源代码+全部数据+文档说明+详细注释+使用说明+截图(高分课程设计)
- 基于Matlab的恶劣天气交通标志识别系统+源代码+全部数据+文档说明+详细注释+使用说明+截图(高分课程设计)
- 基于MATLAB的霍夫曼变换的表盘示数识别+源代码+全部数据+文档说明+详细注释+使用说明+截图(高分课程设计)
- 基于Matlab的车道线识别系统 +源代码+全部数据+文档说明+详细注释+使用说明+截图(高分课程设计)
- 基于MATLAB的教室人数统计系统带Gui界面+源代码+全部数据+文档说明+详细注释+使用说明+截图(高分课程设计)
- 基于MATLAB的教室人数统计系统带Gui界面+源代码+全部数据+文档说明+详细注释+使用说明+截图(高分课程设计)
- 基于MATLAB 的霍夫曼变换答题卡识别源码+全部数据+文档说明+详细注释+使用说明+截图(高分课程设计)
- 基于Matlab+bp神经网络的神经网络汉字识别系统+源代码+全部数据+文档说明+详细注释+使用说明+截图(高分课程设计)
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
评论0