package com.scala.power.nekohtml;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Calendar;
import java.util.Properties;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
/**
* 说明:1.如果需要转换的是jsp页面,则jsp页面中的html标签,一定要有:<html
* xmlns="http://www.w3.org/1999/xhtml">, 否则转换出来的xml中的标签是大写,并且html标签中没有<html
* xmlns="http://www.w3.org/1999/xhtml">,那么,docbook将 无法合成format object
*
* @author scala.power
*
*/
public class NekohtmlTestFetures {
public static void main(String args[]) {
// http://localhost:8080/urlTest/index.jsp
String path = "http://www.baidu.com";
String outputfile = null;
try {
outputfile = getFileName();
} catch (Exception e1) {
e1.printStackTrace();
}
NekohtmlTestFetures test = new NekohtmlTestFetures();
try {
test.resourceURL2XMLConverter(path, outputfile);
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
} catch (Error e) {
e.printStackTrace();
}
}
public boolean resourceURL2XMLConverter(String url, String outPutPath)
throws IOException, Exception, Error {
boolean convertSuccess = true;
boolean isFromfile = false;
converter(url, outPutPath, isFromfile);
return convertSuccess;
}
public boolean resourceFILE2XMLConverter(String inPutPath, String outPutPath)
throws IOException, Exception, Error {
boolean convertSuccess = true;
boolean isFromfile = true;
converter(inPutPath, outPutPath, isFromfile);
return convertSuccess;
}
private void converter(String url, String outPutPath, boolean isFromfile)
throws Exception, Error, IOException {
Document df = getSourceNode(url, isFromfile);
File file = new File(outPutPath);
if (file.exists()) {
file.delete();
}
genXmlFile(df, file);
System.out.println("generate " + file.getCanonicalPath()
+ " successfully!");
}
public void genXmlFile(Node output, File file) throws Exception, Error {
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
DOMSource source = new DOMSource(output);
java.io.FileOutputStream fos = new java.io.FileOutputStream(file);
StreamResult result = new StreamResult(fos);
Properties props = new Properties();
props.setProperty("encoding", "UTF-8");
props.setProperty("method", "xml");
props.setProperty("omit-xml-declaration", "yes");
transformer.setOutputProperties(props);
transformer.transform(source, result);
fos.close();
}
public Document getSourceNode(String path, boolean fromfile)
throws Exception, Error {
DOMParser parser = new DOMParser();
parser.setFeature("http://cyberneko.org/html/features/balance-tags",
true);// 是否增补确实的标签
parser.setFeature("http://cyberneko.org/html/features/augmentations",
true);
parser.setFeature("http://xml.org/sax/features/namespaces", false);
parser
.setFeature(
"http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
true);// 是否忽略文档根元素以后的数据
parser
.setFeature(
"http://cyberneko.org/html/features/balance-tags/document-fragment",
true);// 解析HTML时,是否做标签增补
parser.setProperty("http://cyberneko.org/html/properties/names/elems",
"lower");// 标签小写
parser.setProperty("http://cyberneko.org/html/properties/names/attrs",
"lower");// 属性小写
parser.setProperty(
"http://cyberneko.org/html/properties/namespaces-uri",
"http://www.w3.org/1999/xhtml");
if (path != null && !path.trim().equals("")) {
String tmp = path;
if (fromfile) {// 从文件转换
File input = new File(path);
FileReader fr = new FileReader(input);
InputSource is = new InputSource(fr);
parser.parse(is);
fr.close();
} else {// 从URL转换
URL url = new URL(tmp);
HttpURLConnection con = (HttpURLConnection) url
.openConnection();
InputStream inputs = con.getInputStream();
InputStreamReader isr = new InputStreamReader(inputs, "UTF-8");
InputSource source = new InputSource(isr);
parser.parse(source);
}
return parser.getDocument();
} else {
return null;
}
}
public static String getFileName() throws Exception {
Calendar c = Calendar.getInstance();
String dir = "xmlOut/";
File dirFile = new File(dir);
if (!dirFile.exists()) {
dirFile.mkdirs();
}
System.out.println(dirFile.getAbsolutePath());
String name = dir + "tmp" + c.get(Calendar.YEAR)
+ (c.get(Calendar.MONTH) < 9 ? "0" : "")
+ (c.get(Calendar.MONTH) + 1)
+ (c.get(Calendar.DAY_OF_MONTH) < 10 ? "0" : "")
+ c.get(Calendar.DAY_OF_MONTH)
+ (c.get(Calendar.HOUR_OF_DAY) < 10 ? "0" : "")
+ c.get(Calendar.HOUR_OF_DAY)
+ (c.get(Calendar.MINUTE) < 10 ? "0" : "")
+ c.get(Calendar.MINUTE)
+ (c.get(Calendar.SECOND) < 10 ? "0" : "")
+ c.get(Calendar.SECOND)
+ (c.get(Calendar.MILLISECOND) < 10 ? "0" : "")
+ (c.get(Calendar.MILLISECOND) < 100 ? "0" : "")
+ c.get(Calendar.MILLISECOND);
return name + ".xml";
}
}
没有合适的资源?快使用搜索试试~ 我知道了~
flyingsaucer转html为PDF(中文可用)
共43个文件
jar:16个
svn-base:8个
java:3个
5星 · 超过95%的资源 需积分: 46 134 下载量 106 浏览量
2011-12-12
16:07:25
上传
评论 3
收藏 10.32MB RAR 举报
温馨提示
flyingsaucer转html为PDF(中文可用),不需要更改源码,可以直接应用到中文!
资源推荐
资源详情
资源评论
收起资源包目录
flyingsaucer.rar (43个子文件)
flyingsaucer2
.project 576B
xml
100bottles.jpg 24KB
weather.css 496B
alice
alice3.gif 23KB
.svn
tmp
props
prop-base
text-base
props
all-wcprops 704B
entries 840B
prop-base
alice3.gif.svn-base 53B
alice4.gif.svn-base 53B
alice2.gif.svn-base 53B
text-base
alice.xhtml.svn-base 13KB
alice3.gif.svn-base 23KB
alice.css.svn-base 2KB
alice4.gif.svn-base 29KB
alice2.gif.svn-base 31KB
alice.css.netbeans-base 2KB
format 2B
alice.xhtml 13KB
alice.css 2KB
alice2.gif 31KB
alice4.gif 29KB
weather.xml 557B
sample.xml 361B
src
com
scala
power
flying
FirstDoc.java 5KB
listener
MetaDataCreationListener.java 3KB
nekohtml
NekohtmlTestFetures.java 5KB
lib
serializer-2.7.0.jar 185KB
commons-io-1.3.1.jar 82KB
batik-all-1.7.jar 3.16MB
xml-apis-xerces-2.9.1.jar 190KB
xercesImpl-2.7.1.jar 1.15MB
core-renderer.jar 1.03MB
xml-apis-1.3.04.jar 190KB
xml-apis-ext-1.3.04.jar 84KB
jdom-1.1.2.jar 149KB
xmlgraphics-commons-1.4.jar 556KB
iText-2.0.8.jar 1.16MB
xalan-2.7.0.jar 2.94MB
avalon-framework-4.2.0.jar 80KB
commons-logging-1.0.4.jar 37KB
iTextAsian.jar 324KB
nekohtml.jar 146KB
xmlOut
.settings
org.eclipse.jdt.core.prefs 629B
.classpath 1KB
bin
out
共 43 条
- 1
liwanzhong86622
- 粉丝: 0
- 资源: 1
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
- 1
- 2
- 3
- 4
前往页