package com.enterise.web.htmlgen.doc;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import com.enterise.web.htmlgen.HtmlGenerator;
import com.enterise.web.htmlgen.HtmlPage;
public class Word2Html implements HtmlGenerator {
private HWPFDocument document;
private int preferWordsNumberPerPage;
private float pageWordsNumberChangeRatio;
private WordHtmlBuilder htmlBuilder;
private int wordsNumberCounter1 = 0;
private int wordsNumberCounter2 = 0;
private List paragraphList;
private HtmlPage htmlPage;
private List pictureList;
public Word2Html(InputStream wordInputStream, OutputStream htmlOutputStream) {
this(wordInputStream, htmlOutputStream, 1000, 0.05f);
}
public Word2Html(InputStream wordInputStream, OutputStream htmlOutputStream,
int preferWordsNumberPerPage, float pageWordsNumberChangeRatio) {
this.preferWordsNumberPerPage = preferWordsNumberPerPage;
this.pageWordsNumberChangeRatio = pageWordsNumberChangeRatio;
try {
document = new HWPFDocument(wordInputStream);
} catch (IOException e) {
e.printStackTrace();
}
htmlBuilder = new WordHtmlBuilder();
htmlBuilder.addHtmlPage(new WordHtmlPage());
paragraphList = new ArrayList();
pictureList = new ArrayList();
}
public void generate() {
Range range = document.getRange();
for (int i = 0; i < range.numParagraphs(); i++) {
Paragraph p = range.getParagraph(i);
paragraphList.add(p);
}
processParagraphs();
PicturesTable picturesTable = document.getPicturesTable();
List pictures = picturesTable.getAllPictures();
Iterator iter = pictures.iterator();
while (iter.hasNext()) {
Picture p = (Picture)iter.next();
pictureList.add(p);
}
htmlBuilder.addPictures(pictureList);
htmlBuilder.writeToFile();
}
private int getMinPageWordsNumber() {
return preferWordsNumberPerPage -
Math.round(preferWordsNumberPerPage * pageWordsNumberChangeRatio);
}
private int getMaxPageWordsNumber() {
return preferWordsNumberPerPage +
Math.round(preferWordsNumberPerPage * pageWordsNumberChangeRatio);
}
private void processParagraphs() {
if (paragraphList == null) {
return;
}
Iterator iter = paragraphList.iterator();
if (iter == null) {
return;
}
while (iter.hasNext()) {
Paragraph paragraph = (Paragraph)iter.next();
processParagraph(paragraph);
}
}
private void processParagraph(Paragraph paragraph) {
String paragraphText = paragraph.text();
wordsNumberCounter2 = wordsNumberCounter1;
wordsNumberCounter1 += paragraphText.length();
// 增加一个段落后页面字数不超过preferWordsNumberPagePage限制
if (wordsNumberCounter1 < preferWordsNumberPerPage) {
WordHtmlPage currentPage = (WordHtmlPage)htmlBuilder.getCurrentPage();
currentPage.addParagraph(paragraphText);
}
// 增加一个段落后页面字数超过preferWordsNumberPagePage限制
if (wordsNumberCounter1 > preferWordsNumberPerPage) {
int difference1 = wordsNumberCounter1 - preferWordsNumberPerPage;
int difference2 = preferWordsNumberPerPage - wordsNumberCounter2;
// 增加段落后当前页面字数浮动范围不超出preferWordsNumberPerPage * pageWordsNumberChangeRatio
if (difference1 / preferWordsNumberPerPage < pageWordsNumberChangeRatio &&
difference2 / preferWordsNumberPerPage < pageWordsNumberChangeRatio) {
// 如果添加段落后字数浮动差值小于不添加段落后字数浮动差值
if (difference1 < difference2) {
WordHtmlPage currentPage = (WordHtmlPage)htmlBuilder.getCurrentPage();
currentPage.addParagraph(paragraphText);
// 如果添加段落后字数浮动差值大于不添加段落后字数浮动差值
} else {
htmlBuilder.addHtmlPage(new WordHtmlPage());
wordsNumberCounter1 = 0;
wordsNumberCounter2 = 0;
WordHtmlPage currentPage = (WordHtmlPage)htmlBuilder.getCurrentPage();
currentPage.addParagraph(paragraphText);
}
// 增加段落后当前页面字数浮动范围超出preferWordsNumberPerPage * pageWordsNumberChangeRatio
} else {
String subParagraphText1 = paragraphText.substring(0,
preferWordsNumberPerPage - wordsNumberCounter2);
WordHtmlPage currentPage = (WordHtmlPage)htmlBuilder.getCurrentPage();
currentPage.addParagraph(subParagraphText1);
String subParagraphText2 = paragraphText.substring(
preferWordsNumberPerPage - wordsNumberCounter2);
htmlBuilder.addHtmlPage(new WordHtmlPage());
currentPage = (WordHtmlPage)htmlBuilder.getCurrentPage();
currentPage.addParagraph(subParagraphText2);
wordsNumberCounter1 = 0;
wordsNumberCounter2 = 0;
}
}
}
public static void main(String[] args) {
InputStream is = null;
try {
is = new FileInputStream("test.doc");
} catch (FileNotFoundException e) {
e.printStackTrace();
}
Word2Html word2Html = new Word2Html(is, null);
word2Html.generate();
}
}
完美封装word excel ppt pdf文件解析包,生成手机页面
3星 · 超过75%的资源 需积分: 9 185 浏览量
2012-11-21
21:05:20
上传
评论
收藏 7KB RAR 举报
allenlei2008
- 粉丝: 4
- 资源: 25
最新资源
- HCIP-Datacom笔记 (1).pdf
- yolov5,SSD 可能使用到的一些代码
- bbbbbbbbbbbbbbbbbb
- 安卓逆向学习笔记之Frida Stalker 还原OLLVM AES.docx
- 安卓逆向学习笔记之unicorn来trace还原OLLVM Base64.docx
- 最新版本私钥助记词碰撞器大富豪使用python进行制作通过接口的方式进行验证支持多币种多链多网络一分钟万次验证高出货率
- 介绍离散性制造行业的MES系统流程
- Arduino IDE压缩包版本,2024年4月26日,最新版本
- 基于IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chines微调的中文文本摘要任务源码+数据集
- 自动驾驶-状态估计和定位之直方图滤波(Histogram+Filter)定位应用和源码.pdf
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
- 1
- 2
前往页