maven之读写pdf简单实例(pdfbox与itext)与pdfbox源码解析(访问者模式...--688IT编程网

maven之读写pdf简单实例（pdfbox与itext）与pdfbox源码解析

（访问者模式）

记录学习的脚步

本⽂是⽤pdfbox读写pdf，但是因为pdfbox在写pdf的时候，对中⽂的⽀持不好，会有乱码，我尝试着修改COSString的源码，试了UTF-8、UTF-16BE⼏种编码中⽂输出还是乱码接着把pdfbox parent中的pom 的 <project.build.sourceEncoding>ISO-8859-

1</project.build.sourceEncoding> 属性改为UTF-8 还是不⾏好吧能⼒有限还是放弃了

所幸itext对中⽂的⽀持还不错使⽤itext进⾏写pdf

参考

1、先看pdfbox的读写pdf的代码

产⽣pdf的 SavePdfDocument.java类必要的地⽅都加了注释

package com.undergrowth.pdfbox;

import java.io.IOException;

import org.apachemons.logging.Log;

import org.apachemons.logging.LogFactory;

import org.s.COSString;

import org.ptions.COSVisitorException;

import org.apache.pdfbox.pdmodel.PDDocument;

import org.apache.pdfbox.pdmodel.PDPage;

import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;

import org.apache.pdfbox.pdmodel.font.PDFont;

import org.apache.pdfbox.pdmodel.font.PDType1Font;

/**

* SavePdfDocument类⽤于产⽣pdf⽂档

* @author Administrator

* @date 2014-8-31

* @version 1.0.0

getsavefilename*/

public class SavePdfDocument {

/**

* ⽇志常量

public static final Log Log(SavePdfDocument.class);

/**

* 测试产⽣pdf⽂档

* @param sayWhat 要写⼊到pdf⽂档中的内容

* @param filePath 保存pdf的路径

* @throws IOException

* @throws COSVisitorException

public boolean helloPdf(String sayWhat,String filePath) throws IOException, COSVisitorException{

boolean f=false;

PDDocument document=getPdDocument();

PDPage page=getPdPage();

document.addPage(page);

PDFont font=getFont();

PDPageContentStream contentStream=getPdPageContentStream(document, page);

contentStream.beginText();

contentStream.setFont(font, 20);

/* COSString cosString=new COSString(new Bytes(), "UTF-16BE")); contentStream.drawString("hello world"+"\t");*/

//contentStream.drawString("hello world"+String());

contentStream.drawString(sayWhat);

//关闭页⾯内容流

contentStream.close();

document.save(filePath);

document.close();

logger.info("成功创建pdf");

f=true;

return f;

}

/**

* 获取空的pdf⽂档对象

* @return PDDocument

public PDDocument getPdDocument(){

PDDocument document=new PDDocument();

return document;

}

/**

* 通过⽂件名加载⽂档

* @param fileName

* @return PDDocument

* @throws IOException

public PDDocument getPdDocument(String fileName) throws IOException{

PDDocument document=PDDocument.load(fileName);

return document;

}

/**

* 获取空的pdf页⾯对象

* @return PDPage

public PDPage getPdPage(){

PDPage page =new PDPage();

return page;

}

/**

* 获取海维提卡体

* @return PDFont

public PDFont getFont(){

PDFont font=PDType1Font.HELVETICA_BOLD;

return font;

}

/**

* 获取页⾯内容流向页⾯添加内容

* @param document PDDocument

* @param page PDPage

* @return PDPageContentStream

* @throws IOException

public PDPageContentStream getPdPageContentStream(PDDocument document,PDPage page) throws IOException{ PDPageContentStream contentStream=new PDPageContentStream(document, page);

return contentStream;

}

提取pdf的 PdfTextStripperTest.java

package com.undergrowth.pdfbox;

import java.io.ByteArrayOutputStream;

import java.io.IOException;

import java.io.OutputStreamWriter;

import java.io.Writer;

import org.apachemons.logging.Log;

import org.apachemons.logging.LogFactory;

import org.apache.pdfbox.pdmodel.PDDocument;

import org.apache.pdfbox.util.PDFTextStripper;

public class PdfTextStripperTest {

public static Log Log(PdfTextStripperTest.class);

/**

* 获取⽂本提取

* @param document

* @param writer

* @throws IOException

public void getTextStripper(PDDocument document, Writer writer) throws IOException {

PDFTextStripper textStripper = new PDFTextStripper();

textStripper.writeText(document, writer);

}

/**

* 提取⽂本内容

* @param String fileName 加载⽂档的路径

* @return String

* @throws IOException

public String getText(String fileName) throws IOException {

String textString = "";

SavePdfDocument pdfDocument = new SavePdfDocument();

PDDocument document = PdDocument(fileName); //将提取出来的字节流转换为字符流进⾏显⽰

ByteArrayOutputStream out = new ByteArrayOutputStream();

OutputStreamWriter writer = new OutputStreamWriter(out);

getTextStripper(document, writer);

document.close();

out.close();

writer.close();

byte[] con = ByteArray();

textString = new String(con);

log.info("提取的⽂本内容为:"+textString);

return textString;

}

测试类

package com.undergrowth.pdfbox;

import java.io.ByteArrayOutputStream;

import java.io.IOException;

import java.io.UnsupportedEncodingException;

import org.apachemons.logging.Log;

import org.apachemons.logging.LogFactory;

import org.ptions.COSVisitorException;

import junit.framework.Test;

import junit.framework.TestCase;

import junit.framework.TestSuite;

/**

* Unit test for simple App.

public class AppTest

extends TestCase

{

/**

* Create the test case

* @param testName name of the test case

public AppTest( String testName )

{

super( testName );

}

/**

* @return the suite of tests being tested

public static Test suite()

{

return new TestSuite( AppTest.class );

}

/**

* Rigourous Test :-)

* @throws IOException

* @throws COSVisitorException

public void testApp() throws COSVisitorException, IOException

{

SavePdfDocument pdfDocument=new SavePdfDocument();

String filePath="e:\\hello.pdf";

boolean f=pdfDocument.helloPdf(("hello world"), filePath);

* boolean f=pdfDocument.helloPdf(new String("？我".getBytes("UTF-16BE"),"UTF-16BE"), filePath); * System.out.println("我".getBytes("UTF-8"));

System.out.println(new String("我".getBytes("UTF-16BE"), "UTF-16BE"));

assertTrue( f );

filePath="E:\\test11.pdf";

PdfTextStripperTest textStripperTest=new PdfTextStripperTest();

String stripperText = Text(filePath);

assertNotSame(stripperText, "");

}

688IT编程网

maven之读写pdf简单实例(pdfbox与itext)与pdfbox源码解析(访问者模式...

发表评论

推荐文章

java正则表达式选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符回溯引用和前后查匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式选择题

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

688IT编程网

maven之读写pdf简单实例(pdfbox与itext)与pdfbox源码解析(访问者模式...

发表评论

推荐文章

java正则表达式 选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符 回溯引用和前后查 匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式 选择题

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

java正则表达式选择题

非零金额正则表达式

基本的元字符回溯引用和前后查匹配模式

java正则表达式选择题

非零金额正则表达式