Java富文本html导出wordzip文件--688IT编程网

Java富⽂本html导出wordzip⽂件

Java 富⽂本导出 word 压缩包

1、场景

⽂章详情、产品详情等带图和样式的⽂案数据，在 db ⾥边存了富⽂本，需要导出到 word 压缩包。

2、切⼊点

Q：图⽚数据要么是⽹络图⽚、要么是本地图⽚，要怎么转到 word 显⽰呢？

A：base64

Q：样式不是固定模板，怎么绘制成 word 呢？

A：曲线救国，制作 mht 模板，把数据填充到模板⾥边，再转 word 吐出。

3、开发

1）主要思路

1、制作 mht ⽂件，观察内容格式，修改加参，读取 mht 模板。

2、从 db 取出富⽂本内容，分割标签，分别模仿 mht 的编码规律动态拼接 html ⾮图⽚数据、图⽚ base64 字符串、图⽚的引⽤位置，然后替换 mht ⾥边的占位符，得到 mht 字符串。

3、 mht 字符串转换为 word 并打包。

2）实现

1、mht 模板

1）如果懒得⾃⼰拼接，可以直接下载博主的。

2）模板制作。⽤ office 新建⼀个 doc，写⼏个⽂字，和插⼊⼀张图⽚，另存为 mht 尾缀的⽂件。mht ⽂件在编码的时候⼤概的规律：⽂档的最末尾有⼀个 xml，⾥⾯规定了各种资源的引⽤位置，在这个 xml 上⾯就是各种资源的 base64 编码，⽽实际显⽰的 body 部分只是做了⼀个引⽤⽽已。

⽤记事本打开 mht，搜索 image00，分别到图⽚的资源引⽤位置、 base64 部分、body 对图⽚的引⽤。

图1 资源引⽤位置

图2 base64 部分

图3 body 对图⽚的引⽤

2、占位符替换

分别模仿以上三部分动态拼接图⽚序号及标签内容，⽤于等下替换占位符。

//图⽚的资源引⽤位置

contentImageRefString.append("\n"+"\n"+"<o:File HRef=3D\"image"+ i + j +".png\"/>");

//base64 部分

contentImageString.append("\n"+

"\n"+

"------=_NextPart_01D71F2C.734F7C10\n"+

"Content-Location: file:///C:/D125FE07/file8007.files/image"+

i +

j +

".png\n"+

"Content-Transfer-Encoding: base64\n"+

"Content-Type: image/png\n"+

"\n"+

imgStr);

//body 对图⽚的引⽤

contentString.append("<v:shape id=3D\"图⽚_x0020_1\" o:spid=3D\"_x0000_i1025\" type=3D\"=\n"+ "#_x0000_t75\"\n"+

" style=3D'width:"+ imgWidth +"pt;height:"+ imgHeight +"pt;visibility:visible;mso-wrap-style:squ=\n"+ "are'>\n"+

" <v:imagedata src=3D\"file8007.files/image"+

i +

j +

".png\" o:title=3D\"\"/>\n"+

"</v:shape>");

修改 mht 中图1、图2、图3，加上占位符

3、完整 demo

package exportWordFromUEditor;

import faim.util.FaiList;

import sun.misc.BASE64Encoder;

import java.io.*;

import java.HttpURLConnection;

import java.URL;

import Matcher;

import Pattern;

import java.util.zip.ZipEntry;

import java.util.zip.ZipOutputStream;

/**

* @author Aunty Cai

public class Export {

* 读取 mht ⽂件到字符串

* @param filePath

* @return

* @throws Exception

public static String readFile(String filePath)throws Exception {

StringBuffer buffer =new StringBuffer("");

BufferedReader br = null;

try{

br =new BufferedReader(new InputStreamReader(new FileInputStream(filePath),"UTF-8")); buffer =new StringBuffer();

ady()){

buffer.append((char) br.read());

}

}catch(IOException e){

e.printStackTrace();

}finally{

if(br != null){

br.close();

}

String();

}

/**

* 将⽹络图⽚编码为base64

* @param url

* @return

public static String encodeImageToBase64(URL url){

//将图⽚⽂件转化为字节数组字符串，并对其进⾏Base64编码处理

HttpURLConnection conn = null;

try{

conn =(HttpURLConnection) url.openConnection();

conn.setRequestMethod("GET");

conn.setConnectTimeout(5*1000);

InputStream inStream = InputStream();

ByteArrayOutputStream outStream =new ByteArrayOutputStream();

byte[] buffer =new byte[1024];

int len =0;

while((len = ad(buffer))!=-1){

outStream.write(buffer,0, len);

}

inStream.close();

byte[] data = ByteArray();

//对字节数组Base64编码

BASE64Encoder encoder =new BASE64Encoder();

String base64 = de(data);

return base64;

}catch(IOException e){

e.printStackTrace();

System.out.println("将⽹络图⽚编码为base64 异常！e="+ e);

return"";

}

/**

* 截取富⽂本编辑器中完整的<img/>标签，并调⽤⽅法将其替换为可读取的base64字符串填回富⽂本编辑器中 * @param editorContent

* @param template

* @return

java修改html文件

* @throws Exception

public static String addEditorImage2Template(String editorContent, String template)throws Exception {

StringBuffer contentString =new StringBuffer();//存储template主内容

StringBuffer contentImageString =new StringBuffer();//存储template图⽚的base64字符串

StringBuffer contentImageRefString =new StringBuffer();//存储template图⽚的引⽤位置if(editorContent != null){

String[] stringSplit_img = editorContent.split("<img");//分割<img/>标签

for(int i=0; i<stringSplit_img.length; i++){

if(i !=0){

String[] stringSplit_p =("<img"+ stringSplit_img[i]).split("</p>");//分割<p/>标签

for(int j=0; j<stringSplit_p.length; j++){

if(j !=0){

contentString.append("</p>"+ stringSplit_p[j]);

}else{

//此时stringSplit_p[j]已为完整的&/>标签字符串

//截取图⽚资源路径

for(String s : stringSplit_p[j].split(" ")){

if(s.startsWith("src=")){

s = s.replace("src=\"","");

s = s.replace("\"","");

URL imgUrl =new URL(s);

String imgStr =encodeImageToBase64(imgUrl);

//base64 部分

contentImageString.append("\n"+

"\n"+

"------=_NextPart_01D71F2C.734F7C10\n"+

"Content-Location: file:///C:/D125FE07/file8007.files/image"+

i +

j +

".png\n"+

"Content-Transfer-Encoding: base64\n"+

"Content-Type: image/png\n"+

"\n"+

imgStr);

}else if(s.startsWith("style")){

String regex_width ="width:(?<width>\\d+([.]\\d+)?)px;";

String regex_height ="height:(?<height>\\d+([.]\\d+)?)px;";

double imgWidth =0;

double imgHeight =0;

Pattern pattern = Patternpile(regex_width);

Matcher matcher = pattern.matcher(s);

if(matcher.find()){

imgWidth = Double.up("width"));

}

matcher = Patternpile(regex_height).matcher(s);

if(matcher.find()){

imgHeight = Double.up("height"));

}

/pt=px乘以3/4

imgWidth = imgWidth*3/4;

imgHeight = imgHeight*3/4;

//body 对图⽚的引⽤

contentString.append("<v:shape id=3D\"图⽚_x0020_1\" o:spid=3D\"_x0000_i1025\" type=3D\"=\n"+

"#_x0000_t75\"\n"+

" style=3D'width:"+ imgWidth +"pt;height:"+ imgHeight +"pt;visibility:visible;mso-wrap-style:squ=\n"+

"are'>\n"+

" <v:imagedata src=3D\"file8007.files/image"+

i +

j +

".png\" o:title=3D\"\"/>\n"+

"</v:shape>");

//图⽚的资源引⽤位置

contentImageRefString.append("\n"+"\n"+

"<o:File HRef=3D\"image"+ i + j +".png\"/>");

}else ains("</a>")){

contentString.append("</a>");

}

}else{

contentString.append(stringSplit_img[i]);

}

template = place("${CONTENT}", String());

template = place("${CONTENTIMAGE}", String());

template = place("${CONTENTREF}", String());

return template;

}

public static void main(String[] args)throws Exception {

/1.读取mht⽂件获得mht字符串（需传⼊⽂件路径）；

688IT编程网

Java富文本html导出wordzip文件

发表评论

推荐文章

java正则表达式选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符回溯引用和前后查匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式选择题

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

688IT编程网

Java富文本html导出wordzip文件

发表评论

推荐文章

java正则表达式 选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符 回溯引用和前后查 匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式 选择题

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

java正则表达式选择题

非零金额正则表达式

基本的元字符回溯引用和前后查匹配模式

java正则表达式选择题

非零金额正则表达式