Skip to content

Instantly share code, notes, and snippets.

Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nitaku/9135a0f974d115e83e90 to your computer and use it in GitHub Desktop.
Save nitaku/9135a0f974d115e83e90 to your computer and use it in GitHub Desktop.
Manuscript Transcription Simple Syntax

This experiment shows the use of an Application-Specific Language defined to help users author a TEI representation of a manuscript. This version of MTSS (Manuscript Transcription Simple Syntax) supports line breaks, sentence splitting, abbreviation expansion and word hyphenation.

The editor (with syntax and current sentence highlighting) is developed with CodeMirror.

// noprotect
CodeMirror.defineSimpleMode('mtss', {
start: [
{regex: new RegExp('\\|\\|'), token: 'sentence'},
{regex: new RegExp('(\\[)([^\\]]*)(\\])(\\()([^\\)]*)(\\))'), token: ['choice_square','choice_abbr','choice_square','choice_round','choice_expan','choice_round']},
{regex: new RegExp('{{'), token: 'w', next: 'w'}
w: [
{regex: new RegExp('}}'), token: 'w', next: 'start'},
{regex: new RegExp('.'), token: 'w_content'}
editor = CodeMirror.fromTextArea document.getElementById('editor'), {
mode: 'mtss',
lineNumbers: true,
lineWrapping: true
# TEI translation
editor.on 'change', () -> update_code()
update_code = () ->
mtss = editor.getValue()
# opening sentence in the first folio
tei = '<s class="sentence">\n <lb/>' + mtss
# line break
.replace(new RegExp('\n','g'), '\n <lb/>')
# sentence end mark
.replace(new RegExp('\\|\\|','g'), '\n</s>\n<s class="sentence">')
# <w> tag
.replace(new RegExp('{{','g'), '<w>')
.replace(new RegExp('}}','g'), '</w>')
# <choice> tag
.replace(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)','g'), '<choice><abbr>$1</abbr><expan>$2</expan></choice>')
# closing sentence in the last folio
tei += '\n</s>'
# lb numbering
for i in [1..99] # WARNING this is a dirty hack
tei = tei
.replace('<lb/>', "<lb n=\"#{d3.format('02d')(i)}\"/>")
# s numbering
for i in [1..99] # WARNING this is a dirty hack
tei = tei
.replace('<s class="sentence">', "<s class=\"sentence\" n=\"s_#{d3.format('02d')(i)}\">")
code_el ='#code > code')
# update syntax highlighting
# Sentence highlighting
current_sentence = null
editor.on 'cursorActivity', () ->
cursor = editor.getCursor()
search_cursor = editor.getSearchCursor('||', cursor)
from =
to = search_cursor.pos.from
if current_sentence?
current_sentence = editor.markText(from, to, {className: 'sentence_highlight'})
svg {
background: white;
font-weight: bold;
color: #F70;
.cm-choice_square, .cm-choice_round {
font-weight: bold;
color: #07F;
.cm-w {
font-weight: bold;
color: #092;
.cm-w_content {
color: #092;
.cm-choice_expan {
font-style: italic;
color: #777;
.cm-sentence-2 {
background: yellow;
#editor {
flex: 1;
.CodeMirror {
flex: 1;
height: 500px;
line-height: normal;
#code {
margin: 0;
border-left: 2px solid gray;
background: #EEE;
white-space: pre-wrap;
overflow-y: scroll;
height: 500px;
flex: 1;
body {
display: -webkit-box; /* OLD - iOS 6-, Safari 3.1-6 */
display: -moz-box; /* OLD - Firefox 19- (buggy but mostly works) */
display: -ms-flexbox; /* TWEENER - IE 10 */
display: -webkit-flex; /* NEW - Chrome */
display: flex; /* NEW, Spec - Opera 12.1, Firefox 20+ */
-ms-flex-flow: row;
-webkit-flex-flow: row;
flex-flow: row;
.sentence_highlight {
background: rgba(255,255,0,0.15);
<!DOCTYPE html>
<meta charset="utf-8">
<meta name="description" content="Manuscript Transcription Simple Syntax" />
<title>Manuscript Transcription Simple Syntax</title>
<link type="text/css" href="//" rel="stylesheet"/>
<link type="text/css" href="//" rel="stylesheet"/>
<link type="text/css" href="index.css" rel="stylesheet"/>
<script src=""></script>
<script src="//"></script>
<script src="//"></script>
<script src="//"></script>
<textarea id="editor">This is a sample text written in MTSS (Manuscript
Transcription Simple Syntax), a simple language that
can be automatically translated into TEI.||Sentences can be
terminated with double pipes.||Line breaks are simply
defined by inserting
newline characters.||A word that's splitted by a line break
can be marked by using two curly braces, as in this {{exam
ple}}.||Abbreviated words can be annotated with the
corresponding expansion by using a combination of square
and round brackets: [abbr.](expansion).||
Play with this code to see how the TEI is updated.</textarea>
<pre id="code"><code class="xml"></code></pre>
<script src="//"></script>
<script src="index.js"></script>
(function() {
// noprotect
var current_sentence, editor, update_code;
CodeMirror.defineSimpleMode('mtss', {
start: [
regex: new RegExp('\\|\\|'),
token: 'sentence'
}, {
regex: new RegExp('(\\[)([^\\]]*)(\\])(\\()([^\\)]*)(\\))'),
token: ['choice_square', 'choice_abbr', 'choice_square', 'choice_round', 'choice_expan', 'choice_round']
}, {
regex: new RegExp('{{'),
token: 'w',
next: 'w'
w: [
regex: new RegExp('}}'),
token: 'w',
next: 'start'
}, {
regex: new RegExp('.'),
token: 'w_content'
editor = CodeMirror.fromTextArea(document.getElementById('editor'), {
mode: 'mtss',
lineNumbers: true,
lineWrapping: true
editor.on('change', function() {
return update_code();
update_code = function() {
var code_el, i, mtss, tei, _i, _j;
mtss = editor.getValue();
tei = '<s class="sentence">\n <lb/>' + mtss.replace(new RegExp('\n', 'g'), '\n <lb/>').replace(new RegExp('\\|\\|', 'g'), '\n</s>\n<s class="sentence">').replace(new RegExp('{{', 'g'), '<w>').replace(new RegExp('}}', 'g'), '</w>').replace(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)', 'g'), '<choice><abbr>$1</abbr><expan>$2</expan></choice>');
tei += '\n</s>';
for (i = _i = 1; _i <= 99; i = ++_i) {
tei = tei.replace('<lb/>', "<lb n=\"" + (d3.format('02d')(i)) + "\"/>");
for (i = _j = 1; _j <= 99; i = ++_j) {
tei = tei.replace('<s class="sentence">', "<s class=\"sentence\" n=\"s_" + (d3.format('02d')(i)) + "\">");
code_el ='#code > code');
return hljs.highlightBlock(code_el.node());
current_sentence = null;
editor.on('cursorActivity', function() {
var cursor, from, search_cursor, to;
cursor = editor.getCursor();
search_cursor = editor.getSearchCursor('||', cursor);
from =;
to = search_cursor.pos.from;
if (current_sentence != null) {
return current_sentence = editor.markText(from, to, {
className: 'sentence_highlight'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment