Skip to content

Instantly share code, notes, and snippets.

@martijn
Created June 8, 2022 18:42
Show Gist options
  • Save martijn/07d0029025229054137fa5c6622d09c1 to your computer and use it in GitHub Desktop.
Save martijn/07d0029025229054137fa5c6622d09c1 to your computer and use it in GitHub Desktop.
require "compress/zip"
abstract class SaxParser
ATTR_REGEX = /([[:alnum:]]+)\=\"(.*?)\"/m
def initialize(xml : IO)
@xml = xml
end
def parse!
loop do
if (chars = @xml.gets('<', true))
characters(chars) unless chars.empty?
else
# EOF Reached
break
end
if (tag = @xml.gets('>', true))
if tag.starts_with?('/')
end_tag(tag[1..])
else
tag_name, _, attrs = tag.partition(' ')
if attrs.empty?
start_tag(tag_name, nil)
else
attr_hash = attrs.scan(ATTR_REGEX).each_with_object({} of String => String) do |match, hash|
hash[match[1]] = match[2]
end
start_tag(tag_name, attr_hash)
end
if tag.ends_with?('/')
end_tag(tag_name)
end
end
end
end
end
abstract def start_tag(tag : String, params : Hash(String, String)?)
abstract def characters(chars : String)
abstract def end_tag(tag : String)
end
class MyParser < SaxParser
def characters(chars)
puts chars
end
def start_tag(tag, attrs)
if attrs
attr_string = attrs.map { |k, v| "#{k}=\"#{v}\""}.join(" ")
puts "<#{tag} #{attr_string}>"
else
puts "<#{tag}>"
end
end
def end_tag(tag)
puts "</#{tag}>"
end
end
zip = Compress::Zip::File.new("./office365-xl7.xlsx")
zip["xl/worksheets/sheet1.xml"].open do |xml|
MyParser.new(xml).parse!
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment