package testHtml2Word;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.util.ResourceUtils;
import org.springframework.web.util.HtmlUtils;
public class HtmlToWord {
///**
//* 外部接口
//* @param htmlPath html文件的路径
//* @param cssPath css文件的路径
//* @param wordPath word文件的路径(保存本地的路径)
//* @param code 编码方式(一般都为utf-8)
//* @throws Exception
//*/
//public void htmlToWord2(String htmlPath, String cssPath, String wordPath, String code) throws Exception {
// /*InputStream bodyIs = new FileInputStream("H:\\MyTest\\Java\\test_show\\test.html");
// InputStream cssIs = new FileInputStream("H:\\MyTest\\Java\\test_show\\test.css");*/
// InputStream bodyIs = new FileInputStream(htmlPath);
// //InputStream cssIs = new FileInputStream(cssPath);
// String body = this.getContent(bodyIs);
// String css = "";//this.getContent(cssIs);
// // 拼一个标准的HTML格式文档
// String content = "<html><head><style>" + css + "</style></head><body>" + body + "</body></html>";
// InputStream is = new ByteArrayInputStream(content.getBytes(code));
// OutputStream os = new FileOutputStream(wordPath);
// this.inputStreamToWord(is, os);
//}
/**
* 把is写入到对应的word输出流os中 不考虑异常的捕获,直接抛出
*
* @param is
* @param os
* @throws IOException
*/
private void inputStreamToWord(InputStream is, OutputStream os) throws IOException {
POIFSFileSystem fs = new POIFSFileSystem();
// 对应于org.apache.poi.hdf.extractor.WordDocument
fs.createDocument(is, "WordDocument");
fs.writeFilesystem(os);
os.close();
is.close();
fs.close();
}
/**
* 把输入流里面的内容以UTF-8编码当文本取出。 不考虑异常,直接抛出
*
* @param ises
* @return
* @throws IOException
*/
private String getContent(InputStream... ises) throws IOException {
if (ises != null) {
StringBuilder result = new StringBuilder();
BufferedReader br;
String line;
for (InputStream is : ises) {
br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
while ((line = br.readLine()) != null) {
result.append(line);
}
}
return result.toString();
}
return null;
}
// public static String docToHtml() throws Exception {
// File path = new File(ResourceUtils.getURL("classpath:").getPath());
// String imagePathStr = path.getAbsolutePath() + "\\static\\image\\";
// String sourceFileName = path.getAbsolutePath() + "\\static\\test.doc";
// String targetFileName = path.getAbsolutePath() + "\\static\\test2.html";
// File file = new File(imagePathStr);
// if(!file.exists()) {
// file.mkdirs();
// }
// HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));
// org.w3c.dom.Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
// WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
// //保存图片,并返回图片的相对路径
// wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
// try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
// out.write(content);
// } catch (Exception e) {
// e.printStackTrace();
// }
// return "image/" + name;
// });
// wordToHtmlConverter.processDocument(wordDocument);
// org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
// DOMSource domSource = new DOMSource(htmlDocument);
// StreamResult streamResult = new StreamResult(new File(targetFileName));
// TransformerFactory tf = TransformerFactory.newInstance();
// Transformer serializer = tf.newTransformer();
// serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
// serializer.setOutputProperty(OutputKeys.INDENT, "yes");
// serializer.setOutputProperty(OutputKeys.METHOD, "html");
// serializer.transform(domSource, streamResult);
// return targetFileName;
// }
// public static String docxToHtml() throws Exception {
// File path = new File(ResourceUtils.getURL("classpath:").getPath());
// String imagePath = path.getAbsolutePath() + "\\static\\image";
// String sourceFileName = path.getAbsolutePath() + "\\static\\test.docx";
// String targetFileName = path.getAbsolutePath() + "\\static\\test.html";
//
// OutputStreamWriter outputStreamWriter = null;
// try {
// XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
// XHTMLOptions options = XHTMLOptions.create();
// // 存放图片的文件夹
// options.setExtractor(new FileImageExtractor(new File(imagePath)));
// // html中图片的路径
// options.URIResolver(new BasicURIResolver("image"));
// outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
// XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
// xhtmlConverter.convert(document, outputStreamWriter, options);
// } finally {
// if (outputStreamWriter != null) {
// outputStreamWriter.close();
// }
// }
// return targetFileName;
// }
public static String readfile(String filePath) {
File file = new File(filePath);
InputStream input = null;
try {
input = new FileInputStream(file);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
StringBuffer buffer = new StringBuffer();
byte[] bytes = new byte[1024];
try {
for (int n; (n = input.read(bytes)) != -1;) {
buffer.append(new String(bytes, 0, n, "utf8"));
}
} catch (IOException e) {
e.printStackTrace();
}
return buffer.toString();
}
public static String writeWordFile(String content) {
String path = "D:/wordFile";
Map<String, Object> param = new HashMap<String, Object>();
if (!"".equals(path)) {
File fileDir = new File(path);
if (!fileDir.exists()) {
fileDir.mkdirs();
}
content = HtmlUtils.htmlUnescape(content);
List<HashMap<String, String>> imgs = getImgStr(content);
int count = 0;
for (HashMap<String, String> img : imgs) {
count++;
//处理替换以“/>”结尾的img标签
content = content.replace(img.get("img"), "${imgReplace" + count + "}");
//处理替换以“>”结尾的img标签
content = content.replace(img.get("img1"), "${imgReplace" + count + "}");
Map<String, Object> header = new HashMap<String, Object>();
String imagePath = img.get("src") ;
InputStream inputStream = null;
HttpURLConnection httpURLConnection = null;
try {
URL url = new URL(imagePath);
httpURLConnection = (HttpURLConnection) url.openConnection();// 设置网络连接超时时�
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
testHtml2Word.zip (41个子文件)
777.html 4KB
.settings
org.eclipse.wst.jsdt.ui.superType.name 6B
org.eclipse.wst.common.project.facet.core.xml 414B
org.eclipse.wst.common.component 498B
org.eclipse.wst.jsdt.ui.superType.container 49B
.jsdtscope 564B
org.eclipse.jdt.core.prefs 364B
src
testHtml2Word
HtmlToWord.java 12KB
OfficeUtil.java 7KB
CustomXWPFDocument.java 4KB
JacobUtil.java 6KB
.project 1KB
jacob-1.18.zip 473KB
WebRoot
META-INF
MANIFEST.MF 39B
WEB-INF
classes
testHtml2Word
CustomXWPFDocument.class 5KB
JacobUtil.class 4KB
OfficeUtil.class 7KB
HtmlToWord.class 8KB
lib
org.apache.poi.xwpf.converter.core-1.0.1.jar 150KB
poi-scratchpad-3.14.jar 1.26MB
xmlbeans-2.6.0.jar 2.6MB
commons-codec-1.10.jar 278KB
poi-3.14.jar 2.41MB
commons-collections4-4.1.jar 734KB
spring-core-3.2.6.RELEASE.jar 850KB
poi-ooxml-3.17.jar 1.41MB
poi-3.17.jar 2.58MB
jsoup-1.11.3.jar 386KB
ooxml-schemas-1.3.jar 14.84MB
poi-ooxml-3.14.jar 1.23MB
poi-ooxml-schemas-3.17.jar 5.65MB
jacob.jar 48KB
poi-ooxml-schemas-3.14.jar 5.65MB
xdocreport-1.0.6.jar 1.09MB
spring-web-3.2.6.RELEASE.jar 613KB
org.apache.poi.xwpf.converter.xhtml-1.0.0.jar 49KB
curvesapi-1.03.jar 90KB
poi-scratchpad-3.17.jar 1.33MB
stax-api-1.0.1.jar 26KB
index.jsp 834B
.classpath 1KB
共 41 条
- 1
xiaoshunshuang
- 粉丝: 7
- 资源: 18
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- 基于STM8S103F3P6+STM8S207C8T6+STM32F103 单片机三合一最小系统开发板硬件(原理图+PCB)工程
- 基于C语言实现的打印杨辉三角
- 基于ASIO的插件式服务器,支持TCP,UDP,串口,Http,Websocket统一化的数据接口,隔离开发人员和IO之间的操作
- stm32 usb接口通信
- Chessmate是一款完全免费的国际象棋学习软件,支持引擎分析,学开局、残局、棋书解读、大数据分析等功能
- 总结整理的Android面试Java基础知识点面试资料精编汇总文档资料合集.zip
- .android_lq
- FDN5632N-VB一款SOT23封装N-Channel场效应MOS管
- 毛老板-2404250902.amr
- Java类加载流程(双亲委派)流程图.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
- 1
- 2
前往页