package com.word;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
;
public class Word07 {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
File file = new File("e:\\test\\1.doc");
if (!file.exists()) {
System.out.println("Sorry file does not exists!");
} else {
if (file.getName().endsWith(".docx") || file.getName().endsWith(".DOCX")) {
InputStream inputStream = new FileInputStream(file);
XWPFDocument document = new XWPFDocument(inputStream);
File imageFolderFile = new File("e://test");
XHTMLOptions options = XHTMLOptions.create().URIResolver(
new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
File file2 = new File("e:\\test\\1.html");
OutputStream outputStream = new FileOutputStream(file2);
XHTMLConverter.getInstance().convert(document, outputStream, options);
BufferedReader bufferedReader = new BufferedReader(new FileReader("e:\\test\\1.html"));
String string;
while ((string = bufferedReader.readLine()) != null) {
System.out.println(string);
}
bufferedReader.close();
} else if (file.getName().endsWith(".doc") || file.getName().endsWith(".DOC")) {
InputStream inputStream = new FileInputStream(file);
HWPFDocument document = new HWPFDocument(inputStream);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory
.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
@Override
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
// TODO Auto-generated method stub
return suggestedName;
}
});
wordToHtmlConverter.processDocument(document);
List<Picture> pictures = document.getPicturesTable().getAllPictures();
if (pictures != null) {
for (int i = 0; i < pictures.size(); i++) {
Picture picture = (Picture) pictures.get(i);
picture.writeImageContent(new FileOutputStream("e://test" + "/"
+ picture.suggestFullFileName()));
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outputStream);
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer seralizer = transformerFactory.newTransformer();
seralizer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
seralizer.setOutputProperty(OutputKeys.INDENT, "yes");
seralizer.setOutputProperty(OutputKeys.METHOD, "html");
seralizer.transform(domSource, streamResult);
outputStream.close();
String content = new String(outputStream.toByteArray());
writeFile(content, "e:\\test\\1.html");
}
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TransformerConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TransformerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static void writeFile(String content, String path) {
FileOutputStream fos = null;
BufferedWriter bw = null;
try {
File file = new File(path);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos, "GB2312"));
bw.write(content);
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
try {
if (bw != null)
bw.close();
if (fos != null)
fos.close();
} catch (IOException ie) {
}
}
}
}