Skip to content

Instantly share code, notes, and snippets.

@zachcp
Created February 14, 2020 12:39
Show Gist options
  • Save zachcp/7e006e9763b27a3db908a62ae1603579 to your computer and use it in GitHub Desktop.
Save zachcp/7e006e9763b27a3db908a62ae1603579 to your computer and use it in GitHub Desktop.
Pocket-to-DevonThink
-- POCKET2DEVONTHINK
-- script imports articles from a local Pocket for Mac app into the inbox of the current Devonthink database
-- script comes with the use-it-like-you-want-to-and-dont-blame-me licence
-- last changed on Jan 5, 2015
-- recent changes: errorhandling; pdfs for pocket records without mime setting; conditional searches
-- USER SETTINGS; please adopt to your needs
--loop_min/max define the range of pocket articles to be imported
-- 1/10 would, e.g., import the first ten articles you've ever stored in Pocket
-- 1 and, say, 100000 would presumably move all of them into DT
property loop_min : 1 --useful to import only a few documents
property loop_max : 10000 -- to DT, e.g. for testing or after errors
property where_condition : "" --set to "" or something like " WHERE unique_id='13148'" or " WHERE unique_id>'13148'"; make sure loop_min is small enough
-- some variables required in this script
property scriptlastchanged : "05.01.2015 10:00"
property user_agent : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5"
property sqlite_path : "sqlite3" --"/Applications/Sente65.app/Contents/MacOS/sqlite3"
property strEOR : "<EOR>" & return
property strRecDelim : quote & strEOR & quote
property sFieldDelim : ";; "
property db_path_p : POSIX path of (path to home folder) & "Library/Containers/com.readitlater.PocketMac/Data/Library/Application Support/Pocket/readItLater3.sqlite"
property quoted_db_path_p : quoted form of db_path_p
property offline_path : POSIX path of (path to home folder) & "Library/Containers/com.readitlater.PocketMac/Data/Library/Application Support/Pocket/offline/cache0/RIL_pages/"
property tempfolder_path : POSIX path of (path to home folder) & "Desktop/import_pocket/"
property timestamp : time of (current date) --for logfile
property insta_url_prefix : "http://www.instapaper.com/text?u=http%3A%2F%2F"
property insta_urls_prefix : "http://www.instapaper.com/text?u=https%3A%2F%2F"
property pocket_url_prefix : "http://getpocket.com/a/read/"
## get list of articles from pocket db and it into arrayish list of lists
if where_condition is "" then
set the_articles_text to get_articles("")
else
set the_articles_text to get_articles(where_condition)
end if
set the_articles_text to replaceString(the_articles_text, {"
"}, "\\n") -- avoids
set the_articles to textToTwoDArray(the_articles_text, character id 13, ";; ")
writelog("Pocket2Devonthink
---------------
Script last changed: " & scriptlastchanged & "
" & (current date) & "
Pocket articles to create in Devonthink: " & loop_min & " - " & loop_max & "
Condition: " & where_condition, timestamp)
## LOOP through reading list items
set loop_count to 0
set error_count to 0 -- number is written into log file
repeat with this_article in the_articles
set loop_count to loop_count + 1
set itemlog to "" -- this var gets written to the logfile at the end of each repeat when errors_raised
set errors_raised to false --current item gets into the logfile only when an error occured
log "
################
# " & loop_count & "
"
# MIN MAX Loop
if (loop_count ≥ loop_min) and (loop_count ≤ loop_max) then
set itemlog to "
##" & loop_count & "
"
set itemlog to itemlog & "Raw data on this article according to pocket database: "
set text item delimiters to ";; "
set itemlog to itemlog & this_article
set text item delimiters to ", "
# GET METADATA from reading list
try
set uid to item 1 of this_article
set item_id to item 2 of this_article
set url_orig to item 3 of this_article
set title to item 4 of this_article
set time_added_pocket to item 5 of this_article
set time_added to timestamp2appledate(time_added_pocket) -- to date rli_date
set word_count to item 6 of this_article
set mime to item 7 of this_article
set offline_text to item 8 of this_article
set offline_web to item 9 of this_article
set itemlog to itemlog & "
loop_count: " & loop_count & ";;
uid: " & uid & ";;
item_id: " & item_id & ";;
url_orig: " & url_orig & ";;
title: " & title & ";;
time_added: " & (time_added as string) & ";;
word_count: " & word_count & ";;
mime: " & mime & ";;
offline_text: " & offline_text & ";;
offline_web: " & offline_web
log itemlog
on error errormsg
set itemlog to itemlog & "
--> Error while analysing the data list for this article. Usually, this is caused by a return character in the title field. Please add this article manually to DT.
" & errormsg
set errors_raised to true
set error_count to error_count + 1
end try
# BUILD IMPORT URLs (they might be used or not further down in this script)
(*
# Instapaper
set urlshort to remove_http(url_orig) -- instapaper needs http://, https://, ftp:// removed from url
if characters 1 through 6 of url_orig as string = "https:" then
set insta_url to (insta_urls_prefix & urlshort)
else
set insta_url to (insta_url_prefix & urlshort)
end if
# Pocket
set pocket_url to pocket_url_prefix & item_id
*)
# COPY FILE to temporary folder on Desktop
set this_offlinefolder_path to offline_path & uid & "/"
set this_tempfolder_path to tempfolder_path & uid & "/"
try
set has_local_file to true
do shell script "ditto " & (quoted form of this_offlinefolder_path) & " " & this_tempfolder_path
--should result in something like this: "ditto '/Users/me/Library/Containers/com.readitlater.PocketMac/Data/Library/Application Support/Pocket/offline/cache0/RIL_pages/10001/' /Users/me/Desktop/test_pocket2/10001/"
-- /Users/me/Library/Containers/com.readitlater.PocketMac/Data/Library/… appears to be inaccessible via applescript
on error errormsg
set has_local_file to false
set itemlog to itemlog & "
--> apparently no local copy
" & errormsg
set errors_raised to true
set error_count to error_count + 1
end try
# CREATE RECORD in DEVONthink
set result_record to null
tell application id "DNtp"
try
set location_target to incoming group of current database
## Using Pocket's offline copies located in
-- Library/Containers/com.readitlater.PocketMac/Data/Library/Application Support/Pocket/offline/cache0/RIL_pages/
-- slightly different record creation depending on file type and existence of offline copies
-- would have been easier to just import text.html, web.html, web.pdf - whichever exists
if mime = "application/pdf" or url_orig ends with ".pdf" then -- doesn't catch those pdfs w/o mime and url without "pdf" in it
set local_url to "file://" & this_tempfolder_path & "web.pdf"
set itemlog to itemlog & "
" & local_url
set result_record to create PDF document from local_url in location_target
else if offline_text = "1" then
set local_url to "file://" & (POSIX path of this_tempfolder_path) & "text.html"
set itemlog to itemlog & "
" & local_url
set result_record to create formatted note from local_url in location_target
else if offline_web = "1" then
set local_url to "file://" & this_tempfolder_path & "web.html"
set local_path to this_tempfolder_path & "web.html"
set itemlog to itemlog & "
" & local_url
set rec1 to import local_path to location_target
set rec2 to convert record rec1 to rich -- DT doesn't allow to set URL (or rather: it doesn't show up in the address line
delete record rec1
set result_record to rec2
else if not has_local_file then
set result_record to create record with {URL:url_orig, type:bookmark} in location_target
--else
--set result_record to create record with {name:"error", plain text:"Something went wrong with this record in the if-mime-then operation\n\n" & itemlog, type:text} in location_target
end if
on error errormsg
set itemlog to itemlog & "
--> Something went wrong while creating this record in Devonthink
" & errormsg
--writelog(itemlog, timestamp)
try
set result_record to create record with {name:"error", plain text:itemlog, type:text} in location_target
end try
set errors_raised to true
set error_count to error_count + 1
end try
# TEST whether record was created
try
set record_created to true
name of result_record -- raises an error if class type is missing value
on error errormsg
set record_created to false
set itemlog to itemlog & "
--> No record created in Devonthink
"
set errors_raised to true
set error_count to error_count + 1
end try
# SET METADATA
if record_created then
try
set this_record to result_record
tell this_record
set name to (title)
set the creation date to time_added
set URL to url_orig
set comment to "unique_id::" & uid & "
item_id::" & item_id & "
loop_count::" & loop_count
end tell
end try
end if
(*
-- local copy looks best in 8 of 10 cases; hence I've out-commented this
## Instapaper - create record by downloading via http://www.instapaper.com/text?u=http(s)%3A%2F%2F
set rec_insta to create formatted note from insta_url in location_target
tell rec_insta
set name to (title & " // insta")
set the creation date to time_added
set URL to url_orig
set comment to "unique_id::" & uid & "\nitem_id::" & item_id & "\nloop_count::" & loop_count
end tell
## Pocket - create record by downloading via getpocket.com/a/read/[item_id]
-- looks best, but unreliable unless invoked in browser on Pocket's website
set rec_pocket to create formatted note from pocket_url in location_target
tell rec_pocket
set name to (title & " // pocket")
set the creation date to time_added
set URL to url_orig
set comment to "unique_id::" & uid & "\nitem_id::" & item_id & "\nloop_count::" & loop_count
end tell
*)
end tell
if errors_raised then writelog(itemlog, timestamp)
log itemlog
else if (loop_count > loop_max) then
exit repeat
end if
end repeat
writelog(("
" & error_count & " error(s) occured. (" & (current date) & "). Loop_count: " & loop_count), timestamp)
set logfile to (path to desktop as string) & "Pocket2Devonthink_Log_" & timestamp & ".txt"
set y to POSIX path of logfile
--do shell script "open " & ((path to desktop) as text) & "Pocket2Devonthink_Log_" & timestamp & ".txt"
do shell script "open " & y
## SOME FUNCTIONS
## get_articles()
## searches the reference table of pocket db; where-string can be empty; returns list of articles
-- 1. unique_id, 2. item_id, 3. url, 4. title, 5. time_added,
-- 6. word_count, 7. mime (required to id pdfs via "application/pdf", 8. offline_text (has text.html), 9. offline_web (has web.hmtl)
-- unique_id is used in file system as well
on get_articles(sql_where)
log ">>>> GetPocketReferences"
set sCommand to sqlite_path & " -separator ';; ' " & quoted_db_path_p & " 'select unique_id, item_id, url, title, time_added, word_count, mime, offline_text, offline_web,\"<EOR>
\"
from items" & sql_where & ";'"
set sResult to (do shell script sCommand)
(*set AppleScript's text item delimiters to {strEOR}
set lstResults to paragraphs of sResult
set AppleScript's text item delimiters to return
--log ">>get_articles returns: " & return & "\t" & lstResults
set AppleScript's text item delimiters to ""
return lstResults *)
return sResult
end get_articles
on textToTwoDArray(theText, mainDelimiter, secondaryDelimiter)
set {tids, text item delimiters} to {text item delimiters, mainDelimiter}
set firstArray to text items of theText
set text item delimiters to secondaryDelimiter
set twoDArray to {}
repeat with anItem in firstArray
set end of twoDArray to text items of anItem
end repeat
set text item delimiters to tids
return twoDArray
end textToTwoDArray
on timestamp2appledate(timestamp)
set h to do shell script "date -r " & timestamp & " \"+%Y %m %d %H %M %S\""
set mydate to current date
set year of mydate to (word 1 of h as integer)
set month of mydate to (word 2 of h as integer)
set day of mydate to (word 3 of h as integer)
set hours of mydate to (word 4 of h as integer)
set minutes of mydate to (word 5 of h as integer)
set seconds of mydate to (word 6 of h as integer)
return mydate
end timestamp2appledate
on remove_http(url1)
try
set n to count of url1
if characters 1 through 6 of url1 as string = "https:" then
set url2 to characters 9 thru n of url1 as string
else if characters 1 through 5 of url1 as string = "http:" then
set url2 to characters 8 thru n of url1 as string
else
log "url1: " & url1
log characters 1 through 4 of url1 as string
end if
log url2
return url2
on error
return ""
end try
end remove_http
on replaceString(theText, oldString, newString)
set AppleScript's text item delimiters to oldString
set tempList to every text item of theText
set AppleScript's text item delimiters to newString
set theText to the tempList as string
set AppleScript's text item delimiters to ""
return theText
end replaceString
on writelog(this_message, timestamp)
set the log_file to ((path to desktop) as text) & "Pocket2Devonthink_Log_" & timestamp & ".txt"
try
open for access file the log_file with write permission
write (this_message & return) to file the log_file starting at eof
close access file the log_file
on error
try
close access file the log_file
end try
end try
end writelog
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment