Robert Sachunsky bertsky

append ?print-pdf to preview URL (in slide mode): https://hackmd.io/@bertsky/SkQGVzlUt?print-pdf
open in Chrome/Chromium and print to PDF

append ?print-pdf to preview URL (in slide mode): https://hackmd.io/@bertsky/SkQGVzlUt?print-pdf
open in Chrome/Chromium and print to PDF

	#!/bin/bash

	nontext_opts=(
	xmlstarlet ed -N tei=http://www.tei-c.org/ns/1.0
	-d //tei:note
	-d //tei:fw
	-d //tei:table
	-d //tei:figure
	-d //tei:formula
	-d //tei:titlePage

	#!/usr/bin/env python3

	import argparse
	import os
	import sys
	import io
	from functools import reduce
	import json
	import unicodedata

	#!/usr/bin/env python3

	# Dump user metadata of a kraken model file or fix it.

	import click
	import json
	import os

	if not 'TF_CPP_MIN_LOG_LEVEL' in os.environ:
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # error

	#!/usr/bin/env python3

	import sys
	from lxml import etree as ET

	from ocrd_models.constants import NAMESPACES

	NAMESPACES['oai'] = "http://www.openarchives.org/OAI/2.0/"

	for curie in NAMESPACES:

	set -e

	# select first CUDA device (in case there are multiple, which may fail due to [a recent Tensorflow problem](https://github.com/qurator-spk/eynollah/issues/99))
	export CUDA_VISIBLE_DEVICES=0

	# check we are not running into [this bug](https://github.com/shapely/shapely/issues/1598)
	python3 -c "from shapely.geometry import Polygon; import torch; torch.randn(10).cuda()"

	# validate CUDA support is working in TF and Torch (not an exhaustive test)
	python3 -c "import torch; print(torch.cuda.is_available())"

	import os
	import sys
	import re
	import click
	import json
	from time import time
	import flask

	from distutils.spawn import find_executable as which
	import ocrd

	<!doctype html>
	<html lang="en">
	<head>
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">

	<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
	<style type="text/css">
	.gt .diff {
	color: green;

	# Needs OCR-D/core#327 OCR-D/ocrd_olena#10 OCR-D/ocrd_segment#11 bertsky/ocrd_cis
	# Runs a preprocessing and resegmentation workflow for GT annotation,
	# then extracts page images along JSON descriptions of region polygons and classes;
	# finally, creates a flattened directory under $TARGET.
	# Run: preprocess-ocrd-gt.sh [TARGET-DIRECTORY [METS-FILE]]
	# (default is all METS files anywhere under CWD)

	TARGET=${1:-../1000pages-crop-sauvola-denoise-deskew-repair}
	WORKSPACES=${2:-$(find . -name mets.xml)}