Skip to content

Instantly share code, notes, and snippets.

@manjeshpv
Created June 1, 2021 02:58
Show Gist options
  • Save manjeshpv/e7a4d4a8ed42a1290b86538d3b597336 to your computer and use it in GitHub Desktop.
Save manjeshpv/e7a4d4a8ed42a1290b86538d3b597336 to your computer and use it in GitHub Desktop.
Cloud Function to Extract Text from Image - works in server hosted
/**
* sudo yum install tesseract-ocr
*
npm init
npm i express node-tesseract-ocr
MINIO_FOLDER=/mnt/data node function-ocr.js
MINIO_FOLDER=/mnt/data pm2 start function-ocr.js --name
*
curl http://localhost:9001/functions/ocr?object=bucket/image.jpg
curl http://minio.domain.com/functions/ocr?object=bucket/image.jpg
*
server {
server_name minio.domain.com;
locations /functions {
proxy_pass http://127.0.0.1:9001;
}
}
*
*/
const http = require('http');
const express = require('express');
const tesseract = require("node-tesseract-ocr")
const port = process.env.PORT || 9001;
const ip = '0.0.0.0';
const FILES_ROOT = process.env.SFS_FOLDER;
const app = express();
const server = http.createServer(app);
function startServer() {
console.log('startServer...')
return new Promise((res, rej) => {
return server.listen(port, ip, (err) => {
if (err) return rej(err);
app.emit('appStarted');
return res();
});
});
}
app.get('/functions/ocr', async (req, res) => {
console.log('ocr', req.query.object);
try {
const object = req.query.object;
const text = await tesseract.recognize(`${FILES_ROOT}/${object}`, { lang: "eng", oem: 1, psm: 3})
return res.json({
text,
})
} catch (err) {
console.error(err);
return res.status(500).json(err);
}
})
app.on('appStarted', () => {
console.log('API: Express server listening on %d, in %s mode', port, app.get('env'));
});
startServer();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment