word解析为html⽂本并替换内容图⽚(doc、docx 我们的需求是上传word并把word内容返回到当前页⾯富⽂本框,⽹上看了好多 也试过直接⽤poi解析
感觉直接解析就是个坑word各种格式够玩⼀年的。。
这⾥是把word转成html再读取 主要代码如下:
public static String docToHtml(File file) throws Exception {
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(file));
Document document = wInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
// 保存图⽚,并返回图⽚的相对路径html内容文本框
wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
File imgfile = new File(name);
OutputStream output = null;
BufferedOutputStream bufferedOutput = null;
try {
output = new FileOutputStream(imgfile);
bufferedOutput = new BufferedOutputStream(output);
bufferedOutput.write(content);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
bufferedOutput.close();
output.close();
} catch (Exception e) {
e.printStackTrace();
}
}
String url = OSSUnit.uploadObject2OSS(imgfile);
imgfile.delete();
return url;
});
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = Document();
DOMSource domSource = new DOMSource(htmlDocument);
File htmlFile = new UserfilesBaseDir()+IdStr()+".html");
StreamResult streamResult = new StreamResult(htmlFile);
TransformerFactory tf = wInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
String sCurrentLine="";
String sTotalString="";
java.io.BufferedReader l_reader = new java.io.BufferedReader(
new java.io.InputStreamReader(new FileInputStream(htmlFile)));
while ((sCurrentLine = adLine()) != null) {
sTotalString += sCurrentLine ;//+ "/r/n"
System.out.println(sCurrentLine);
}
l_reader.close();
htmlFile.delete();
return sTotalString;
}
// docx转换为html
public static String docxToHtml(File file) throws Exception {
// OutputStreamWriter outputStreamWriter = null;
try {
XWPFDocument document = new XWPFDocument(new FileInputStream(file));
// XHTMLOptions options = ate();
// 2) Prepare XHTML options (here we set the IURIResolver to
// load images from a "word/media" folder)
File imageFolderFile = new UserfilesBaseDir()+"11");
XHTMLOptions options = ate().URIResolver(
new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
/
/ 存放图⽚的⽂件夹
// options.setExtractor(new FileImageExtractor(new UserfilesBaseDir())));
// html中图⽚的路径
// options.URIResolver(new BasicURIResolver("image"));
String descFileName = UserfilesBaseDir()+IdStr()+".html";
File htmlFile = new File(descFileName);
// outputStreamWriter = new OutputStreamWriter(new FileOutputStream(htmlFile), "utf-8");
String sCurrentLine="";
String sTotalString="";
java.io.BufferedReader l_reader = new java.io.BufferedReader(
new java.io.InputStreamReader(new FileInputStream(htmlFile)));
while ((sCurrentLine = adLine()) != null) {
sTotalString += sCurrentLine ;//+ "/r/n"
System.out.println(sCurrentLine);
}
l_reader.close();
htmlFile.delete();
tReImg(sTotalString,"1");
} finally {
// if (outputStreamWriter != null) {
/
/ outputStreamWriter.close();
// }
}
}
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论