Skip to content

Instantly share code, notes, and snippets.

@Dmitra
Last active August 12, 2016 06:33
Show Gist options
  • Save Dmitra/b76741090e9046588375 to your computer and use it in GitHub Desktop.
Save Dmitra/b76741090e9046588375 to your computer and use it in GitHub Desktop.
convert indented text to graphml
class File
def to_string
string = ''
self.each {|line|
string << line
}
return string
end
end
class Graph
attr_accessor :name
def initialize(labels=[])
@name = "graph"
@edges = []
@labels = labels
end
def add(label)
@labels << label
end
def connect(source, target)
s = @labels.index(source)
t = @labels.index(target)
if s and t and !@edges.include?([s,t])
@edges << [s, t]
end
end
def connect_by_id(s,t)
@edges << [s,t] if @edges[s] and @edges[t]
end
def to_file(name)
out = File.open(name, 'w')
out << @labels.join("\n")
end
def to_xml(name=(@name+".xml"))
xml = <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<graphml xmlns="http://graphml.graphdrawing.org/xmlns">
<graph edgedefault="directed">
<key id="label" for="node" attr.name="label" attr.type="string"/>
EOF
footer = "</graph></graphml>"
@labels.each_with_index{|l, i| xml +="<node id=\"#{i}\"><data key=\"label\">#{l}</data></node>\n"}
@edges.each{|e| xml += "<edge source=\"#{e[0]}\" target=\"#{e[1]}\"></edge>\n"}
xml += footer
File.open(name, 'w') << xml
end
def self.read(file)
string = File.open(file, 'r').to_string
graph = case File.extname(file)
when ".graphml" then read_graphML(string)
when ".txt" then read_ws(string) #whitespaced
end
graph.name = File.basename(file, File.extname(file))
graph
end
def self.extract_keywords(file)
str = File.open(file, 'r').to_string
str.chop!; str[0,1]=' '; str.lstrip! #remove first and last brace
array = str.split(/\}\{/).uniq.reject{|w| w.size == 1} #treat '}{' as separator and reject one symbol words
out = File.open("out", 'w')
out << array.join("\n")
end
def self.normalize(file)
`mystem.exe -l #{file} out`
end
private
def self.read_graphML(string)
ids = string.scan(/<node id\="((\w+|\:{2})*)"/).map{|id| id[0]}
labels = string.scan(/>(.*)<\/y:NodeLabel/).map{|l| l[0] unless l[0].match(/Folder/)}.compact
edges = string.scan(/<edge id=".+" source="((\w+|:{2})+)" target="((\w+|:{2})+)"/).map{|edge| [edge[0], edge[2]]}
graph = Graph.new(labels)
edges.each{|edge| graph.connect(labels[ids.index(edge[0])], labels[ids.index(edge[1])])}
ids.each_with_index{|id, i|
group = id.split(/::/)
if group.size > 1
group.pop
graph.connect_by_id(ids.index(group.join('::')), i)
end
}
return graph
end
def self.read_ws(string)
graph = Graph.new()
parent = ""
string.split("\n").each_with_index{|node,index|
graph.add node
if node[0,2] == " "
node[0,2] = ""
graph.connect(parent, node)
puts "#{parent} - #{index}"
else
parent = node
puts "#{parent} ->"
end
}
return graph
end
end
Graph.read("input.txt").to_xml
#graph.to_file('out.graphml')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment