Skip to content

Instantly share code, notes, and snippets.

xquery version "3.1";
declare namespace output = "http://www.w3.org/2010/xslt-xquery-serialization";
declare option output:method "html5";
declare option output:media-type "text/html";
import module namespace request = "http://exist-db.org/xquery/request";
let $addressee := request:get-parameter("addressee", "World")
xquery version "3.1";
declare variable $local:regexes :=
map {
"month-regex" : "(January|February|March|April|May|June|July|August|September|October|November|December)",
"month-regex-fr" : "(janvier|février|fevrier|mart|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre)",
"month-regex-sp" : "(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre)",
"day-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?",
"day-range-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?\s*[-–—]\s*(\d{1,2})(?:st|d|nd|rd|th)?",
"year-regex" : "(\d{4})",
xquery version "3.1";
declare variable $local:regexes :=
map {
"month-regex" : "(January|February|March|April|May|June|July|August|September|October|November|December)",
"month-regex-fr" : "(janvier|février|fevrier|mart|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre)",
"month-regex-sp" : "(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre)",
"day-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?",
"day-range-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?\s*[-–—]\s*(\d{1,2})(?:st|d|nd|rd|th)?",
"year-regex" : "(\d{4})",
xquery version "3.1";
declare variable $local:regexes :=
map {
"month-regex" : "(January|February|March|April|May|June|July|August|September|October|November|December)",
"month-regex-fr" : "(janvier|février|fevrier|mart|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre)",
"month-regex-sp" : "(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre)",
"day-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?",
"day-range-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?\s*[-–—]\s*(\d{1,2})(?:st|d|nd|rd|th)?",
"year-regex" : "(\d{4})",
xquery version "3.1";
declare variable $local:regexes :=
map {
"month-regex" : "(January|February|March|April|May|June|July|August|September|October|November|December)",
"month-regex-fr" : "(janvier|février|fevrier|mart|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre)",
"month-regex-sp" : "(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre)",
"day-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?",
"day-range-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?\s*[-–—]\s*(\d{1,2})(?:st|d|nd|rd|th)?",
"year-regex" : "(\d{4})",
(: declare namespace dp='https://history.state.gov/ns/xquery/date-processing' :)
declare variable $local:regexes :=
map {
"month-regex" : "(January|February|March|April|May|June|July|August|September|October|November|December)",
"month-regex-fr" : "(janvier|février|fevrier|mart|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre)",
"month-regex-sp" : "(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre)",
"day-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?",
"day-range-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?\s*[-–—]\s*(\d{1,2})(?:st|d|nd|rd|th)?",
"year-regex" : "(\d{4})",
(: declare namespace dp='https://history.state.gov/ns/xquery/date-processing' :)
declare variable $local:regexes :=
map {
"month-regex" : "(January|February|March|April|May|June|July|August|September|October|November|December)",
"month-regex-fr" : "(janvier|février|fevrier|mart|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre)",
"month-regex-sp" : "(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre)",
"day-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?",
"day-range-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?\s*[-–—]\s*(\d{1,2})(?:st|d|nd|rd|th)?",
"year-regex" : "(\d{4})",
@WaxCylinderRevival
WaxCylinderRevival / analyze-text-for-date-patterns.xq
Last active October 30, 2017 04:53
analyze-text-for-date-patterns.xq
(: declare namespace dp='https://history.state.gov/ns/xquery/date-processing' :)
declare variable $local:regexes :=
map {
"month-regex" : "(?:January|February|March|April|May|June|July|August|September|October|November|December)",
"month-regex-fr" : "(?:janvier|février|fevrier|mart|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre)",
"month-regex-sp" : "(?:enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre)",
"day-regex" : "(?:\d{1,2})(?:st|d|nd|rd|th)?",
"day-range-regex" : "(?:\d{1,2})(?:st|d|nd|rd|th)?\s*[-–—]\s*(?:\d{1,2})(?:st|d|nd|rd|th)?",
"year-regex" : "(?:\d{4})",
@WaxCylinderRevival
WaxCylinderRevival / analyze-string-for-dates.xqm
Last active October 27, 2017 00:25
Analyze input for date patterns common to FRUS dates, using fn:analyze-string
(: declare namespace dp='https://history.state.gov/ns/xquery/date-processing' :)
declare function local:find-date-strings
( $textWithDates as element()? ) as element()* {
let $textString :=
$textWithDates/node()[not(self::note)] => string-join(' ') => normalize-space() => analyze-string('((\d{1,2}(d|nd|rd|st|th)*\s+(January|February|March|April|May|June|July|August|September|October|November|December),*\s+\d{4})|((January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}(d|nd|rd|st|th)*,*\s+\d{4}))','i')
for $match in data($textString/fn:match)
@WaxCylinderRevival
WaxCylinderRevival / find-date-string.xqm
Last active October 18, 2017 18:54
Test of local function to find date string(s) with common FRUS date pattern and wrap with date element
import module namespace functx="http://www.functx.com" at "http://www.xqueryfunctions.com/xq/functx-1.0-nodoc-2007-01.xq";
(: declare namespace date-processing='https://history.state.gov/ns/xquery/dates' :)
declare function local:find-date-strings
( $textWithDates as xs:string? ) as element()* {
for $textString in
$textWithDates => data() => serialize() => normalize-space() => functx:get-matches('((\d{1,2}[(st)(d)(nd)(rd)(th)]*\s+(January|February|March|April|May|June|July|August|September|October|November|December),*\s+\d{4})|((January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}[(st)(nd)(d)(rd)(th)]*,\s+\d{4}))') => serialize() => functx:trim() => tokenize('\s\s+')