Skip to content

Instantly share code, notes, and snippets.

@WaxCylinderRevival
Last active October 27, 2017 00:25
Show Gist options
  • Save WaxCylinderRevival/2f89603c3946b7dff20b88b61cd289bb to your computer and use it in GitHub Desktop.
Save WaxCylinderRevival/2f89603c3946b7dff20b88b61cd289bb to your computer and use it in GitHub Desktop.
Analyze input for date patterns common to FRUS dates, using fn:analyze-string
(: declare namespace dp='https://history.state.gov/ns/xquery/date-processing' :)
declare function local:find-date-strings
( $textWithDates as element()? ) as element()* {
let $textString :=
$textWithDates/node()[not(self::note)] => string-join(' ') => normalize-space() => analyze-string('((\d{1,2}(d|nd|rd|st|th)*\s+(January|February|March|April|May|June|July|August|September|October|November|December),*\s+\d{4})|((January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}(d|nd|rd|st|th)*,*\s+\d{4}))','i')
for $match in data($textString/fn:match)
let $when :=
let $year := analyze-string($match, '\d{4}$')/fn:match
let $month :=
switch (analyze-string($match,'(January|February|March|April|May|June|July|August|September|October|November|December)')/fn:match)
case "January" return "01"
case "February" return "02"
case "March" return "03"
case "April" return "04"
case "May" return "05"
case "June" return "06"
case "July" return "07"
case "August" return "08"
case "September" return "09"
case "October" return "10"
case "November" return "11"
case "December" return "12"
default return "error"
let $day := analyze-string($match, '\d{1,2}')/fn:match[1]
let $day2Digit := $day => format-number('00')
return concat($year,'-',$month,'-',$day2Digit)
return <date when="{$when}">{$match}</date>
};
let $input := <head>The first date is <hi>February 2d, 1865</hi>. The next date is March 1, 2010. The <strong>third</strong> date is <hi rend="italic">31st July</hi> 2015.<note>This is a note with a date: September 1, 1929.</note></head>
let $dates := local:find-date-strings($input)
return <results>{$dates}</results>
@WaxCylinderRevival
Copy link
Author

Example

Input:

<head>The first date is <hi>February 2d, 1865</hi>. The next date is March 1, 2010.  The <strong>third</strong> date is <hi rend="italic">31st July</hi> 2015.<note>This is a note with a date: September 1, 1929.</note></head>

Results:

<results>
  <date when="1865-02-02">February 2d, 1865</date>
  <date when="2010-03-01">March 1, 2010</date>
  <date when="2015-07-31">31st July 2015</date>
</results>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment