html转Word(可批量转化但速度较慢,支持图片显示,图片在本地或网络都行)

共35个文件

jar：21个

class：4个

java：4个

html转w

1星需积分: 49 12 浏览量 2019-05-08 15:53:13 上传评论 3 收藏 34.75MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

test.zip （35个子文件）

test

bin

test

CustomXWPFDocument.class 5KB

JacobUtil.class 3KB

OfficeUtil.class 7KB

HtmlToWord.class 9KB

.settings

org.eclipse.core.resources.prefs 57B

org.eclipse.jdt.core.prefs 598B

src

test

HtmlToWord.java 9KB

OfficeUtil.java 7KB

CustomXWPFDocument.java 4KB

JacobUtil.java 4KB

.project 380B

WebRoot

META-INF

MANIFEST.MF 39B

WEB-INF

classes

lib

org.apache.poi.xwpf.converter.core-1.0.1.jar 150KB

poi-scratchpad-3.14.jar 1.26MB

xmlbeans-2.6.0.jar 2.6MB

commons-codec-1.10.jar 278KB

poi-3.14.jar 2.41MB

commons-collections4-4.1.jar 734KB

spring-core-3.2.6.RELEASE.jar 850KB

poi-ooxml-3.17.jar 1.41MB

poi-3.17.jar 2.58MB

jsoup-1.11.3.jar 386KB

ooxml-schemas-1.3.jar 14.84MB

poi-ooxml-3.14.jar 1.23MB

poi-ooxml-schemas-3.17.jar 5.65MB

jacob.jar 48KB

poi-ooxml-schemas-3.14.jar 5.65MB

xdocreport-1.0.6.jar 1.09MB

spring-web-3.2.6.RELEASE.jar 613KB

org.apache.poi.xwpf.converter.xhtml-1.0.0.jar 49KB

curvesapi-1.03.jar 90KB

poi-scratchpad-3.17.jar 1.33MB

stax-api-1.0.1.jar 26KB

index.jsp 834B

.classpath 2KB

package test; import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.web.util.HtmlUtils; import java.io.*; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; public class HtmlToWord { static String savePath = "D:/Confluence";//文件保存位置 static String scanPath = "C:/Users/Administrator/Desktop/Confluence-space-export-094133-228.html/service/";//要扫描的文件所在文件夹 /** * 把输入流里面的内容以UTF-8编码当文本取出。不考虑异常，直接抛出 * * @param ises * @return * @throws IOException */ private String getContent(InputStream... ises) throws IOException { if (ises != null) { StringBuilder result = new StringBuilder(); BufferedReader br; String line; for (InputStream is : ises) { br = new BufferedReader(new InputStreamReader(is, "UTF-8")); while ((line = br.readLine()) != null) { result.append(line); } } return result.toString(); } return null; } public static String writeWordFile(String content) { String path = savePath; Map<String, Object> param = new HashMap<String, Object>(); if (!"".equals(path)) { File fileDir = new File(path); if (!fileDir.exists()) { fileDir.mkdirs(); } content = HtmlUtils.htmlUnescape(content); HashMap<String, Object> imgsornm = getImgStr(content); List<HashMap<String, String>> imgs = (List<HashMap<String, String>>)imgsornm.get("list"); int count = 0; for (HashMap<String, String> img : imgs) { count++; //处理替换以“/>”结尾的img标签 content = content.replace(img.get("img"), "${imgReplace" + count + "}"); //处理替换以“>”结尾的img标签 content = content.replace(img.get("img1"), "${imgReplace" + count + "}"); Map<String, Object> header = new HashMap<String, Object>(); String imagePath = img.get("src"); String[] split = imagePath.split(":"); InputStream inputStream = null; if(split != null && split.length >3){ HttpURLConnection httpURLConnection = null; try { URL url = new URL(imagePath); httpURLConnection = (HttpURLConnection) url.openConnection();// 设置网络连接超时时间 httpURLConnection.setConnectTimeout(3000);// 设置应用程序要从网络连接读取数据 httpURLConnection.setDoInput(true); httpURLConnection.setRequestMethod("GET"); int responseCode = httpURLConnection.getResponseCode(); if (responseCode == 200) {// 从服务器返回一个输入流 inputStream = httpURLConnection.getInputStream(); } } catch (Exception e) { e.printStackTrace(); } } else { try { String imageispath = "C:/Users/Administrator/Desktop/Confluence-space-export-094133-228.html/service/" + imagePath; inputStream = new FileInputStream(imageispath); } catch (FileNotFoundException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } try { //如果没有宽高属性，默认设置为400*300 if (img.get("width") == null || img.get("height") == null) { header.put("width", 400); header.put("height", 300); } else { header.put("width", (int) (Double.parseDouble(img.get("width")))); header.put("height", (int) (Double.parseDouble(img.get("height")))); } header.put("type", "jpg"); header.put("content", OfficeUtil.inputStream2ByteArray(inputStream, true)); } catch (Exception e) { e.printStackTrace(); } param.put("${imgReplace" + count + "}", header); } try { // 生成doc格式的word文档，需要手动改为docx byte by[] = content.getBytes("UTF-8"); ByteArrayInputStream bais = new ByteArrayInputStream(by); POIFSFileSystem poifs = new POIFSFileSystem(); DirectoryEntry directory = poifs.getRoot(); DocumentEntry documentEntry = directory.createDocument("WordDocument", bais); FileOutputStream ostream = new FileOutputStream(savePath + "/temp.doc"); poifs.writeFilesystem(ostream); bais.close(); ostream.close(); JacobUtil.wordConveter(savePath + "/temp.doc"); // 临时文件（手动改好的docx文件） CustomXWPFDocument doc = OfficeUtil.generateWord(param, savePath + "/temp.docx"); //最终生成的带图片的word文件 System.out.println(); //处理h1标签作为文件名時的特殊字符 String namestr = (String)imgsornm.get("title"); String nm = RegExString(namestr);//去除文件名的特殊字符 FileOutputStream fopts = new FileOutputStream(savePath + "/"+ nm +".docx"); doc.write(fopts); fopts.close(); } catch (Exception e) { e.printStackTrace(); } } return "save is OK"; } //获取html中的图片元素信息 public static HashMap<String, Object> getImgStr(String htmlStr) { HashMap<String, Object> rnmap = new HashMap<String, Object>(); List<HashMap<String, String>> pics = new ArrayList<HashMap<String, String>>(); Document doc = Jsoup.parse(htmlStr); Elements imgs = doc.select("img"); //读取html中h1标签用来作为文件名开始------- Elements title = doc.select("h1"); String text = title.text(); HashMap<String, String> mapnm = new HashMap<String, String>(); rnmap.put("title", text); //读取html中h1标签用来作为文件名结束------- for (Element img : imgs) { System.out.println(img.attr("width").length()); HashMap<String, String> map = new HashMap<String, String>(); if (!"".equals(img.attr("width")) /*&& img.attr("width").length() >= 2*/) { map.put("width", img.attr("width").substring(0, img.attr("width").length())); } if (!"".equals(img.attr("height")) /*&& img.attr("height").length() >= 2*/) { map.put("height", img.attr("height").substring(0, img.attr("height").length())); } map.put("img", img.toString().substring(0, img.toString().length() - 1) + "/>"); map.put("img1", img.toString()); map.put("src", img.attr("src")); pics.add(map); }

评论收藏

内容反馈