Skip to content

Instantly share code, notes, and snippets.

@doraTeX
Last active November 22, 2023 02:31
Show Gist options
  • Save doraTeX/6c6419b8256b5636c662279455a5b6d1 to your computer and use it in GitHub Desktop.
Save doraTeX/6c6419b8256b5636c662279455a5b6d1 to your computer and use it in GitHub Desktop.
Original Swift / Objective-C / AppleScriptObjC codes from which ocr.sh (https://gist.github.com/doraTeX/da9a1a26dffbf3fe5d6ec12a9c79267c) is converted
#import <Quartz/Quartz.h>
#import <Vision/Vision.h>
int main(int argc, const char * argv[]) {
@autoreleasepool {
NSString *target = @"test.pdf";
CGFloat dpi = 200;
PDFDocument *doc = [[PDFDocument alloc] initWithURL:[NSURL fileURLWithPath:target]];
NSUInteger pageCount = [doc pageCount];
VNRecognizeTextRequest *request = [[VNRecognizeTextRequest alloc] initWithCompletionHandler:^(VNRequest * _Nonnull request, NSError * _Nullable error) {
NSArray<VNRecognizedTextObservation*> *observations = [request results];
for (VNRecognizedTextObservation* observation in observations) {
NSString *string = [[[observation topCandidates:1] firstObject] string];
puts(string.UTF8String);
}
}];
request.recognitionLevel = VNRequestTextRecognitionLevelAccurate;
request.usesLanguageCorrection = YES;
request.recognitionLanguages = @[@"ja", @"en"];
NSInteger revision;
if (@available(macOS 13.0, *)) {
revision = VNRecognizeTextRequestRevision3;
} else if (@available(macOS 11.0, *)) {
revision = VNRecognizeTextRequestRevision2;
} else {
revision = VNRecognizeTextRequestRevision1;
}
request.revision = revision;
for (NSUInteger i=0; i<pageCount; i++) {
CGFloat scaleFactor = dpi / (72.0 * [[NSScreen mainScreen] backingScaleFactor]);
NSPDFImageRep *pdfImageRep = [NSPDFImageRep imageRepWithData:[[doc pageAtIndex:i] dataRepresentation]];
NSSize originalSize = pdfImageRep.bounds.size;
NSSize scaledSize = NSMakeSize(originalSize.width * scaleFactor, originalSize.height * scaleFactor);
NSRect targetRect = NSMakeRect(0, 0, scaledSize.width, scaledSize.height);
NSImage *image = [[NSImage alloc] initWithSize: targetRect.size];
[image lockFocus];
[[NSColor whiteColor] set];
[NSBezierPath fillRect: targetRect];
[pdfImageRep drawInRect: targetRect];
[image unlockFocus];
CGImageRef cgImage = [[NSBitmapImageRep imageRepWithData:[image TIFFRepresentation]] CGImage];
[[[VNImageRequestHandler alloc] initWithCGImage:cgImage options:@{}] performRequests:@[request] error:nil];
}
return 0;
}
}
use framework "Quartz"
use framework "Vision"
global CA, PDFDocument, NSURL, VNRecognizeTextRequest, VNImageRequestHandler, NSScreen, NSImage, NSPDFImageRep, NSColor, NSBezierPath, NSMutableArray
set CA to current application
set PDFDocument to CA's PDFDocument
set NSURL to CA's NSURL
set VNRecognizeTextRequest to CA's VNRecognizeTextRequest
set VNImageRequestHandler to CA's VNImageRequestHandler
set NSScreen to CA's NSScreen
set NSImage to CA's NSImage
set NSPDFImageRep to CA's NSPDFImageRep
set NSColor to CA's NSColor
set NSBezierPath to CA's NSBezierPath
set NSMutableArray to CA's NSMutableArray
global request, scaleFactor
set request to VNRecognizeTextRequest's alloc's init
request's setRecognitionLevel:(CA's VNRequestTextRecognitionLevelAccurate)
request's setUsesLanguageCorrection:true
on ocrPDF(filePath, dpi)
set doc to PDFDocument's alloc's initWithURL:(NSURL's fileURLWithPath:filePath)
set pageCount to doc's pageCount
set resultTexts to NSMutableArray's new()
set scaleFactor to (dpi / (72.0 * (NSScreen's mainScreen's backingScaleFactor)))
repeat with i from 1 to pageCount
set pdfImageRep to (NSPDFImageRep's imageRepWithData:((doc's pageAtIndex:(i - 1))'s dataRepresentation))
set originalSize to pdfImageRep's |bounds|
set originalWidth to CA's NSWidth(originalSize)
set originalHeight to CA's NSHeight(originalSize)
set scaledSize to CA's NSMakeSize(originalWidth * scaleFactor, originalHeight * scaleFactor)
set targetRect to CA's NSMakeRect(0, 0, scaledSize's width, scaledSize's height)
set image to (NSImage's alloc's initWithSize:(targetRect's item 2))
image's lockFocus()
NSColor's whiteColor's |set|()
(NSBezierPath's fillRect:targetRect)
(pdfImageRep's drawInRect:targetRect)
image's unlockFocus()
set tiff to image's TIFFRepresentation
set ocrText to my ocrTIFF(tiff)
(resultTexts's addObject:ocrText)
end repeat
return (resultTexts's componentsJoinedByString:linefeed) as text
end ocrPDF
on ocrImage(filePath)
set scaleFactor to NSScreen's mainScreen's backingScaleFactor
set bitmapImageRep to (CA's NSBitmapImageRep's imageRepWithData:((NSImage's alloc's initWithContentsOfFile:(filePath))'s TIFFRepresentation))
set srcSize to CA's NSMakeSize((bitmapImageRep's pixelsWide as real) / scaleFactor, (bitmapImageRep's pixelsHigh as real) / scaleFactor)
set srcImage to (NSImage's alloc's initWithSize:srcSize)
srcImage's addRepresentation:bitmapImageRep
set newImage to (NSImage's alloc's initWithSize:srcSize)
set targetRect to CA's NSMakeRect(0, 0, srcSize's width, srcSize's height)
newImage's lockFocus()
NSColor's whiteColor's |set|()
(NSBezierPath's fillRect:targetRect)
(srcImage's drawInRect:targetRect)
newImage's unlockFocus()
set tiff to newImage's TIFFRepresentation
return my ocrTIFF(tiff)
end ocrImage
on ocrTIFF(tiff)
set resultTexts to NSMutableArray's new()
set requestHandler to (VNImageRequestHandler's alloc's initWithData:tiff options:(missing value))
(requestHandler's performRequests:[request] |error|:(missing value))
set results to request's results()
repeat with aResult in results
(resultTexts's addObject:(((aResult's topCandidates:1)'s objectAtIndex:0)'s |string|()))
end repeat
return (resultTexts's componentsJoinedByString:linefeed) as text
end ocrTIFF
on ocr(filePath, lang, dpi)
if lang is "ja" then
request's setRecognitionLanguages:["ja", "en"]
else
request's setRecognitionLanguages:["en"]
end if
set pathExtension to ((CA's NSString's stringWithString:filePath)'s pathExtension as text)
if pathExtension is "pdf" then
my ocrPDF(filePath, dpi)
else
my ocrImage(filePath)
end if
end ocr
set ocrResult to my ocr("test.pdf", "ja", 200)
import CoreGraphics
import Quartz
import Vision
let target = "test.pdf"
let dpi: CGFloat = 200
let doc = PDFDocument(url: URL(filePath: target))!
let pageCount = doc.pageCount
let request = VNRecognizeTextRequest { (request, _) in
let observations = request.results as? [VNRecognizedTextObservation] ?? []
let obs = observations.map { $0.topCandidates(1).first?.string ?? ""}
let result = obs.joined(separator: "\n")
print(result)
}
request.recognitionLevel = VNRequestTextRecognitionLevel.accurate
request.usesLanguageCorrection = true
request.recognitionLanguages = ["ja", "en"]
let revision: Int
if #available(macOS 13.0, *) {
revision = VNRecognizeTextRequestRevision3
} else if #available(macOS 11.0, *) {
revision = VNRecognizeTextRequestRevision2
} else {
revision = VNRecognizeTextRequestRevision1
}
request.revision = revision
for i in 0..<pageCount {
let scaleFactor = dpi / (72.0*NSScreen.main!.backingScaleFactor)
let pdfImageRep = NSPDFImageRep(data: doc.page(at: i)!.dataRepresentation!)!
let originalSize = pdfImageRep.bounds.size
let scaledSize = NSSize(width: originalSize.width * scaleFactor, height: originalSize.height * scaleFactor)
let targetRect = NSRect(origin: NSPoint.zero, size: scaledSize)
let image = NSImage(size: targetRect.size)
image.lockFocus()
NSColor.white.set()
NSBezierPath.fill(targetRect)
pdfImageRep.draw(in: targetRect)
image.unlockFocus()
let cgImage = NSBitmapImageRep(data: image.tiffRepresentation!)!.cgImage!
try! VNImageRequestHandler(cgImage: cgImage, options: [:]).perform([request])
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment