Skip to content

Instantly share code, notes, and snippets.

@hubgit
Last active January 12, 2024 11:39
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save hubgit/600ec0c224481e910d2a0f883a7b98e3 to your computer and use it in GitHub Desktop.
Save hubgit/600ec0c224481e910d2a0f883a7b98e3 to your computer and use it in GitHub Desktop.
Render the text of a PDF with PDF.js
<!doctype html>
<meta charset="utf-8">
<title>Render the text of a PDF with PDF.js</title>
<style>
.page-container {
box-shadow: 0 1px 3px #444;
position: relative;
font-size: 1px;
line-height: 1;
}
span {
position: absolute;
cursor: text;
white-space: pre;
transform-origin: left bottom;
}
</style>
<body>
<script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script>
<script>
PDFJS.disableWorker = true
// PDFJS.workerSrc = 'bower_components/pdfjs-dist/build/pdf.worker.js'
// PDFJS.cMapUrl = 'bower_components/pdfjs-dist/cmaps'
// PDFJS.cMapPacked = true
PDFJS.disableRange = true
// PDFJS.disableStream = true
</script>
<script>
PDFJS.getDocument('https://peerj.com/articles/2548.pdf').then(function (pdf) {
var ctx = document.createElement('canvas').getContext('2d', { alpha: false });
for (var i = 1; i <= pdf.numPages; i++) {
pdf.getPage(i).then(function (page) {
var viewport = page.getViewport(1.5);
var pageContainer = document.createElement('div');
pageContainer.classList.add('page-container');
pageContainer.style.width = viewport.width + 'px';
pageContainer.style.height = viewport.height + 'px';
// var pageContainer = document.createElementNS('http://www.w3.org/2000/svg', 'svg:svg');
// pageContainer.setAttribute('width', viewport.width + 'px');
// pageContainer.setAttribute('height', viewport.height + 'px');
// pageContainer.setAttribute('font-size', 1);
document.body.appendChild(pageContainer);
// page.getOperatorList().then(function (opList) {
// var svgGfx = new PDFJS.SVGGraphics(page.commonObjs, page.objs);
//
// svgGfx.getSVG(opList, viewport).then(function (svg) {
// pageContainer.appendChild(svg);
// });
// });
page.getTextContent({ normalizeWhitespace: true }).then(function (textContent) {
textContent.items.forEach(function (textItem) {
var tx = PDFJS.Util.transform(
PDFJS.Util.transform(viewport.transform, textItem.transform),
[1, 0, 0, -1, 0, 0]
);
var style = textContent.styles[textItem.fontName];
// adjust for font ascent/descent
var fontSize = Math.sqrt((tx[2] * tx[2]) + (tx[3] * tx[3]));
if (style.ascent) {
tx[5] -= fontSize * style.ascent;
} else if (style.descent) {
tx[5] -= fontSize * (1 + style.descent);
} else {
tx[5] -= fontSize / 2;
}
// adjust for rendered width
if (textItem.width > 0) {
ctx.font = tx[0] + 'px ' + style.fontFamily;
var width = ctx.measureText(textItem.str).width;
if (width > 0) {
//tx[0] *= (textItem.width * viewport.scale) / width;
tx[0] = (textItem.width * viewport.scale) / width;
}
}
// var item = document.createElementNS('http://www.w3.org/2000/svg', 'svg:text');
// item.textContent = textItem.str;
// item.setAttribute('font-family', style.fontFamily);
// item.setAttribute('transform', 'matrix(' + tx.join(' ') + ')');
var item = document.createElement('span');
item.textContent = textItem.str;
item.style.fontFamily = style.fontFamily;
//item.style.transform = 'matrix(' + tx.join(',') + ')';
item.style.fontSize = fontSize + 'px';
item.style.transform = 'scaleX(' + tx[0] + ')';
item.style.left = tx[4] + 'px';
item.style.top = tx[5] + 'px';
pageContainer.appendChild(item);
});
});
});
}
});
</script>
@caot
Copy link

caot commented Nov 7, 2020

It renders text only, and the image is ignored.

@erik-engel
Copy link

This is a much appreciated example to what I am currently working on. Thank you! Much appreciated :D

@andreicrnd
Copy link

was locking for an example with textItem styles for a couple of days, thank you!

@Forceizer
Copy link

I was looking to make a pdf viewer for react-native by extracting these information. Thank you for your example, it will really help me get started!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment