Skip to content

Instantly share code, notes, and snippets.

@Normal-Tangerine8609
Last active September 11, 2023 10:30
Show Gist options
  • Save Normal-Tangerine8609/d9532d78c9a3afa31899b00e21feb45d to your computer and use it in GitHub Desktop.
Save Normal-Tangerine8609/d9532d78c9a3afa31899b00e21feb45d to your computer and use it in GitHub Desktop.
Simple Scriptable RSS Feed Parser (XML)
function parseXML(string) {
/*
* Part 1
*
* Parse the xml into a DOM-like json
*
* Input: <tag attribute="value">text node<inline>text node</inline>text node</tag>
*
* Output:
* {
* "name": "root",
* "children": [
* {
* "name": "tag",
* "attrs": {
* "attribute": "value"
* },
* "innerText": "text node text node",
* "children": [
* {
* "name": "inline",
* "attrs": {},
* "innerText": "text node",
* "children": []
* }
* ]
* }
* ]
* }
*
*/
// Root of xml
let main = {
isRoot: true,
name: "root",
children: []
}
// Node to add onto
let target = main
// Store symbols to go back to parent nodes
let goBack = {}
// Declare the parser
let parser = new XMLParser(string)
// Store the new node and switch targets when entering a new tag
parser.didStartElement = (name, attrs) => {
let backTo = Symbol()
goBack[backTo] = target
let newTarget = {
name,
attrs,
innerText: "",
children: [],
end: backTo
}
target.children.push(newTarget)
target = newTarget
}
// Go back to the parent node when entering a closing tag
parser.didEndElement = (name) => {
let sym = target.end
delete target.end
target = goBack[sym]
}
// Add the inner text to the node, if there are multiple text nodes, combine them with spaces
parser.foundCharacters = (text) => {
target.innerText +=
target.innerText === "" ? text.trim() : " " + text.trim()
}
// Throw error on invalid input
parser.parseErrorOccurred = () => {
console.warn(
"A parse error occurred, ensure the document is formatted properly."
)
}
// Parse and return the root
parser.parse()
// If something went wrong and there is no root node, throw an error
if (!main.isRoot) {
console.warn(
"A parse error occurred, ensure the document is formatted properly."
)
}
// Remove the isRoot key
delete main.isRoot
/*
* Part 2
*
* Manipulate the AST-like json into a simpler json without attributes and other less important items. This will not collect both tag and text nodes if they are in the same tag.
*
* Input:
* {
* "name": "root",
* "children": [
* {
* "name": "tag",
* "attrs": {
* "attribute": "value"
* },
* "innerText": "text node text node",
* "children": [
* {
* "name": "inline",
* "attrs": {},
* "innerText": "text node",
* "children": []
* }
* ]
* }
* ]
* }
*
* Output:
* {
* "tag": {
* "inline": "text node"
* }
* }
*
* Notes: you can chose to only use part 1 if you need the attributes or text and tag nodes in the same tag. Replace the `return traverse(main)` below to `return main`
*/
// Return the new simple JSON
return traverse(main)
// Function to change each node
function traverse(node) {
// Store the new node
let newNode = {}
// Repeat with all children nodes
for (let child of node.children) {
// traverse the child
let newChild = traverse(child)
// If there are no children of the child than it should become a text value
if (child.children.length === 0) {
newChild = child.innerText
}
// If the new node already has a key of the child's name it will become an array
if (newNode[child.name]) {
// If it is an array, push the new child
if (Array.isArray(newNode[child.name])) {
newNode[child.name].push(newChild)
} else {
// If it is not an array, change it into one
newNode[child.name] = [newNode[child.name], newChild]
}
} else {
// If it is not in the keys, set the child as a value in the new node
newNode[child.name] = newChild
}
}
// Return the new node
return newNode
}
}
@Normal-Tangerine8609
Copy link
Author

Example

Input

<root>
  <node>
    <text>text node</text>
    <details>text node</details>
    <key>value</key>
  </node>
  <list>
    <item>text node</item>
    <item>text node</item>
    <item><tag>text node</tag></item>
    <key>value</key>
  </list>
</root>

Output

{
  "root": {
    "node": {
      "text": "text node",
      "details": "text node",
      "key": "value"
    },
    "list": {
      "item": [
        "text node",
        "text node",
        {
          "tag": "text node"
        }
      ],
      "key": "value"
    }
  }
}

@Normal-Tangerine8609
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment