Skip to content

Instantly share code, notes, and snippets.

@andreicrnd
Forked from hubgit/index.html
Created January 18, 2023 14:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andreicrnd/a09d1b848623bd8b85823cb05e66dbaa to your computer and use it in GitHub Desktop.
Save andreicrnd/a09d1b848623bd8b85823cb05e66dbaa to your computer and use it in GitHub Desktop.
Render the text of a PDF with PDF.js
<!doctype html>
<meta charset="utf-8">
<title>Render the text of a PDF with PDF.js</title>
<style>
.page-container {
box-shadow: 0 1px 3px #444;
position: relative;
font-size: 1px;
line-height: 1;
}
span {
position: absolute;
cursor: text;
white-space: pre;
transform-origin: left bottom;
}
</style>
<body>
<script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script>
<script>
PDFJS.disableWorker = true
// PDFJS.workerSrc = 'bower_components/pdfjs-dist/build/pdf.worker.js'
// PDFJS.cMapUrl = 'bower_components/pdfjs-dist/cmaps'
// PDFJS.cMapPacked = true
PDFJS.disableRange = true
// PDFJS.disableStream = true
</script>
<script>
PDFJS.getDocument('https://peerj.com/articles/2548.pdf').then(function (pdf) {
var ctx = document.createElement('canvas').getContext('2d', { alpha: false });
for (var i = 1; i <= pdf.numPages; i++) {
pdf.getPage(i).then(function (page) {
var viewport = page.getViewport(1.5);
var pageContainer = document.createElement('div');
pageContainer.classList.add('page-container');
pageContainer.style.width = viewport.width + 'px';
pageContainer.style.height = viewport.height + 'px';
// var pageContainer = document.createElementNS('http://www.w3.org/2000/svg', 'svg:svg');
// pageContainer.setAttribute('width', viewport.width + 'px');
// pageContainer.setAttribute('height', viewport.height + 'px');
// pageContainer.setAttribute('font-size', 1);
document.body.appendChild(pageContainer);
// page.getOperatorList().then(function (opList) {
// var svgGfx = new PDFJS.SVGGraphics(page.commonObjs, page.objs);
//
// svgGfx.getSVG(opList, viewport).then(function (svg) {
// pageContainer.appendChild(svg);
// });
// });
page.getTextContent({ normalizeWhitespace: true }).then(function (textContent) {
textContent.items.forEach(function (textItem) {
var tx = PDFJS.Util.transform(
PDFJS.Util.transform(viewport.transform, textItem.transform),
[1, 0, 0, -1, 0, 0]
);
var style = textContent.styles[textItem.fontName];
// adjust for font ascent/descent
var fontSize = Math.sqrt((tx[2] * tx[2]) + (tx[3] * tx[3]));
if (style.ascent) {
tx[5] -= fontSize * style.ascent;
} else if (style.descent) {
tx[5] -= fontSize * (1 + style.descent);
} else {
tx[5] -= fontSize / 2;
}
// adjust for rendered width
if (textItem.width > 0) {
ctx.font = tx[0] + 'px ' + style.fontFamily;
var width = ctx.measureText(textItem.str).width;
if (width > 0) {
//tx[0] *= (textItem.width * viewport.scale) / width;
tx[0] = (textItem.width * viewport.scale) / width;
}
}
// var item = document.createElementNS('http://www.w3.org/2000/svg', 'svg:text');
// item.textContent = textItem.str;
// item.setAttribute('font-family', style.fontFamily);
// item.setAttribute('transform', 'matrix(' + tx.join(' ') + ')');
var item = document.createElement('span');
item.textContent = textItem.str;
item.style.fontFamily = style.fontFamily;
//item.style.transform = 'matrix(' + tx.join(',') + ')';
item.style.fontSize = fontSize + 'px';
item.style.transform = 'scaleX(' + tx[0] + ')';
item.style.left = tx[4] + 'px';
item.style.top = tx[5] + 'px';
pageContainer.appendChild(item);
});
});
});
}
});
</script>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment