PDF.js⼤部分⽤法都是基于Promise的,Document(url)⽅法返回的就是⼀个Promise:
});
PDF的解析⼯作需要通过Page(page)去执⾏,这个⽅法返回的也是⼀个Promise,因此可以去逐页解析PDF:
});
渲染页⾯
各PDF页⾯有它⾃⼰的视窗,它定义了像素⼤⼩(n.72dpi和初始旋转。默认情况下,该窗⼝将缩放到PDF但是通过修改视图可以更改此操作。当创建了视图时,还会创建⼀个初始转换矩阵,它考虑到期望的规模、旋转,并转换坐标系统(0点)PDF⽂档底部左边,⽽画布0是左。
var scale = 1.5;
var viewport = Viewport(scale);
var canvas = ElementById('the-canvas');
var context = Context('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var renderContext = {
canvasContext: context,
viewport: viewport
};
还可以⾃定义canvas⼤⼩:
var desiredWidth = 100;
var viewport = Viewport(1);
var scale = desiredWidth / viewport.width;
var scaledViewport = Viewport(scale);
官⽅给出的⽰例:
ar url = '//illa/pdfjs/helloworld.pdf';
PDFJS.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';
var loadingTask = Document(url);
loadingTask.promise.then(function(pdf) {
console.log('PDF loaded');
var pageNumber = 1;
console.log('Page loaded');
var scale = 1.5;
var viewport = Viewport(scale);
var canvas = ElementById('the-canvas');
var context = Context('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var renderContext = {
canvasContext: context,
viewport: viewport
};
var renderTask = der(renderContext);
renderTask.then(function () {
console.log('Page rendered');
});
});
}, function (reason) {
<(reason);
});
另外较⼤的PDF⽂件可以⽤base 64编码⽅式加载,例如:
var pdfData = atob(
'JVBERi0xLjcKCjEgMCBvYmogICUgZW50cnkgcG9pbnQKPDwKICAvVHlwZSAvQ2F0YWxvZwog' +
'IC9QYWdlcyAyIDAgUgo+PgplbmRvYmoKCjIgMCBvYmoKPDwKICAvVHlwZSAvUGFnZXMKICAv' +
'TWVkaWFCb3ggWyAwIDAgMjAwIDIwMCBdCiAgL0NvdW50IDEKICAvS2lkcyBbIDMgMCBSIF0K' +
'Pj4KZW5kb2JqCgozIDAgb2JqCjw8CiAgL1R5cGUgL1BhZ2UKICAvUGFyZW50IDIgMCBSCiAg' +
'L1Jlc291cmNlcyA8PAogICAgL0ZvbnQgPDwKICAgICAgL0YxIDQgMCBSIAogICAgPj4KICA+' +
'PgogIC9Db250ZW50cyA1IDAgUgo+PgplbmRvYmoKCjQgMCBvYmoKPDwKICAvVHlwZSAvRm9u' +
'dAogIC9TdWJ0eXBlIC9UeXBlMQogIC9CYXNlRm9udCAvVGltZXMtUm9tYW4KPj4KZW5kb2Jq' +
'Cgo1IDAgb2JqICAlIHBhZ2UgY29udGVudAo8PAogIC9MZW5ndGggNDQKPj4Kc3RyZWFtCkJU' +
'CjcwIDUwIFRECi9GMSAxMiBUZgooSGVsbG8sIHdvcmxkISkgVGoKRVQKZW5kc3RyZWFtCmVu' +
'ZG9iagoKeHJlZgowIDYKMDAwMDAwMDAwMCA2NTUzNSBmIAowMDAwMDAwMDEwIDAwMDAwIG4g' +
'CjAwMDAwMDAwNzkgMDAwMDAgbiAKMDAwMDAwMDE3MyAwMDAwMCBuIAowMDAwMDAwMzAxIDAw' + 'MDAwIG4gCjAwMDAwMDAzODAgMDAwMDAgbiAKdHJhaWxlcgo8PAogIC9TaXplIDYKICAvUm9v' +
'dCAxIDAgUgo+PgpzdGFydHhyZWYKNDkyCiUlRU9G');
PDFJS.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';
var loadingTask = Document({data: pdfData});
loadingTask.promise.then(function(pdf) {
console.log('PDF loaded');
var pageNumber = 1;
console.log('Page loaded');
var scale = 1.5;
var viewport = Viewport(scale);
var canvas = ElementById('the-canvas');
var context = Context('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var renderContext = {
canvasContext: context,
viewport: viewport
};
var renderTask = der(renderContext);
renderTask.then(function () {
console.log('Page rendered');
});
});
}, function (reason) {
<(reason);
});
pdf翻页处理:
// If absolute URL from the remote server is provided, configure the CORS
// header on that server.
var url = '//illa/pdfjs/tracemonkey.pdf';
// The workerSrc property shall be specified.
PDFJS.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';
var pdfDoc = null,
pageNum = 1,
pageRendering = false,
pageNumPending = null,
scale = 0.8,
canvas = ElementById('the-canvas'),
ctx = Context('2d');
/**
* Get page info from document, resize canvas accordingly, and render page.
* @param num Page number.
*/
function renderPage(num) {
pageRendering = true;
var viewport = Viewport(scale);
canvas.height = viewport.height;
canvas.width = viewport.width;
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
var renderTask = der(renderContext);
renderTask.promise.then(function() {
pageRendering = false;
if (pageNumPending !== null) {
renderPage(pageNumPending);
pageNumPending = null;
}
});
});
}js方法
function queueRenderPage(num) {
if (pageRendering) {
pageNumPending = num;
} else {
renderPage(num);
}
}
function onPrevPage() {
if (pageNum <= 1) {
return;
}
pageNum--;
queueRenderPage(pageNum);
}
function onNextPage() {
if (pageNum >= pdfDoc.numPages) {
return;
}
pageNum++;
queueRenderPage(pageNum);
}
pdfDoc = pdfDoc_;
renderPage(pageNum);
});
关于page⽅式的使⽤:
解析结果,我们可以看下这个对象提供的⽅法:
⽅法返回
getAnnotations A promise that is resolved with an {Array} of the annotation objects.
getTextContent That is resolved a TextContent object that represent the page text content. getViewport Contains ‘width’ and ‘height’ properties along with transforms required for rendering. render An object that contains the promise, which is resolved when the page finishes rendering.
我们可以试试调⽤getTextContent⽅法,并将其结果打印出来:
console.log(page);
});
输⼊格式⼤致如下:
{
"items": [
{
"str": "xxx",
"dir": "xxx",
"width": xxx,
"height": xxx,
"transform": [
48,
0,
0,
48,
45.32495,
679.04
],
"fontName": "g_d0_f1"
},
{
"str": " ",
"dir": "ltr",
"width": 9.600000000000001,
"height": 2304,
"transform": [
48,
0,
0,
48,
285.325,
679.04
],
"fontName": "g_d0_f2"
}
],
"styles": {
"g_d0_f1": {
"fontFamily": "monospace",
"ascent": 1.05810546875,
"descent": -0.26171875,
"vertical": false
},
"g_d0_f2": {
"fontFamily": "sans-serif",
"ascent": 0.74365234375,
"descent": -0.25634765625
}
}
}
PDF.js能将每页⽂本的字符串、位置、字体都解析出来。
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论