Skip to content

Instantly share code, notes, and snippets.

@RichMorin
Created June 13, 2014 23:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RichMorin/736e2e9014b41a3629ef to your computer and use it in GitHub Desktop.
Save RichMorin/736e2e9014b41a3629ef to your computer and use it in GitHub Desktop.
redo_nodes - redo node sets into TSV data rows
dbpr_!Kheis_Local_Municipality ||
dbpr_!Women_Art_Revolution ||
dbpr_"A,"_My_Name_is_Alex_-_Parts_I_&_II ||
dbpr_"Awesome" ||
dbpr_"Banksy_of_Bulgaria" ||
dbpr_"Big"_Brian_Subich ||
dbpr_"Big"_Paul_Williams ||
dbpr_"C"_Is_for_(Please_Insert_Sophomoric_Genitalia_Reference_Here) ||
dbpr_"D"_Is_for_Dubby_–_The_Lustmord_Dub_Mixes ||
dbpr_"Dor-en-Ernil" ||
dbpr_"Go_Away" ||
dbpr_"Gypsy"_in_Jazz ||
dbpr_"Happy"_in_Galoshes ||
dbpr_"In"_Jazz_for_the_Culture_Set ||
dbpr_"Irish"_Teddy_Mann ||
dbpr_"Isis"_of_the_Suebi ||
dbpr_"King_Ernest"_Baker ||
dbpr_"North_Shore_Railroad_(California) ||
dbpr_"Oh_Yeah!"_Live ||
dbpr_"R"_word ||
dbpr_"Ridgeriders"_In_Concert ||
dbpr_"Rommel?"_"Gunner_Who?" ||
dbpr_"Singles" ||
dbpr_"Southern_New_Jersey_Railroad" ||
dbpr_"Sunshine"_Sonny_Payne ||
dbpr_"The_Above_Ground_Sound"_of_Jake_Holmes ||
dbpr_"The_Spaghetti_Incident?" ||
dbpr_"U"_Is_for_Undertow ||
dbpr_"Uncle_Tom's_Cabin"_Contrasted_with_Buckingham_Hall,_the_Planter's_Home ||
dbpr_"Unplugged"_Live ||
dbpr_"V"_Is_for_Vengeance ||
dbpr_"Weird_Al"_Yankovic ||
dbpr_"Weird_Al"_Yankovic_Live!_–_The_Alpocalypse_Tour ||
dbpr_"Welding"_Kumar ||
dbpr_"Wesleyan_Methodist_College" ||
dbpr_"i" ||
dbpr_$O$ ||
dbpr_$_(Mark_Sultan_album) ||
dbpr_$h*!_My_Dad_Says ||
dbpr_$uga(r) ||
dbpr_&_I_Made_A_Man ||
dbpr_&_Then_Boom ||
dbpr_''Same_Team''_Fallacy ||
dbpr_'A_morte_'e_Carnevale ||
dbpr_'Abd_Allah_ibn_'Amr_ibn_al-'As ||
dbpr_'Abd_al-Razzaq_al-Hasani ||
dbpr_'Abd_as-Sattar_Qasm ||
dbpr_'Ajde_Jano ||
dbpr_'Ali-Sultan ||
dbpr_'Ali_ibn_al-Husayn_ibn_Quraysh ||
dbpr_'Amanave ||
dbpr_'Amr_III_ibn_al-Mundhir ||
dbpr_'Amr_ibn_Adi ||
dbpr_'Amr_ibn_Imru'_al-Qays ||
dbpr_'Amr_ibn_al-'As ||
dbpr_'Ana_Po'uhila ||
dbpr_'Anin ||
dbpr_'Aoa ||
dbpr_'Arab_al-Jahalin ||
dbpr_'Arab_al-Rashayida ||
dbpr_'Au'asi ||
dbpr_'Azazme ||
dbpr_'Aziz_'Ali_al-Misri ||
dbpr_'Bout_Changes_'n'_Things ||
dbpr_'Bout_Love ||
dbpr_'Bout_Soul ||
dbpr_'Deed_I_Do ||
dbpr_'Disco'_La_Passione ||
dbpr_'Elisiva_Fusipala_Vaha'i ||
dbpr_'Em_Are_I ||
dbpr_'F'_Debut ||
dbpr_'Galway_Joe'_Dolan ||
dbpr_'How's_my_driving?'_sign ||
dbpr_'Ili'ili ||
dbpr_'It's_Alive!' ||
dbpr_'Makholane ||
dbpr_'Malakeng ||
dbpr_'Mamants'O ||
dbpr_'Moteng ||
dbpr_'N_Sync ||
dbpr_'N_Sync_in_Concert ||
dbpr_'Neath_Austral_Skies ||
dbpr_'O'ua ||
dbpr_'Ole_language ||
dbpr_'Op_o'_Me_Thumb ||
dbpr_'Orfi_Shirazi ||
dbpr_'Oro ||
dbpr_'Ota_'ika ||
dbpr_'Punnagai_Poo'_Gheetha ||
dbpr_'R_Xmas ||
dbpr_'Round_About_Midnight_at_the_Cafe_Bohemia ||
dbpr_'Round_Here ||
dbpr_'SUP_Magazine ||
dbpr_'S_Wonderful_(album) ||
dbpr_'Splosion_Man ||
dbpr_'The_All-Species_Living_Tree'_Project ||
dbpr_'The_Masterwork'_Award_Winning_Fish-Knife ||
dbpr_'Til_Death_Do_Us_Party ||
dbpr_'Til_I_Can_Make_It_on_My_Own ||
dbpr_'Til_I_Gain_Control_Again ||
#!/usr/bin/env julia
#
# redo_nodes - redo node sets into TSV data rows
#
# (WIP) Transliteration of redo_nodes (and bi_prep.rb) into Julia.
#
# Pkg.add("YAML")
#
# Written by Rich Morin, CFCL, 2014
require("YAML")
function from_repl()
#
# This function allows redo_nodes to be called from the Julia REPL.
println("from_repl") #T
global inp_dir, inp_file, out_dir, out_file
inp_dir, inp_file, out_dir, out_file = ".", "oT.t2", ".", "oT.t2j"
redo_nodes()
end
function from_shell()
#
# This function allows redo_nodes to be called from the shell
# (eg, via the "shebang line" at the top of the file).
println("from_shell") #T
global inp_dir, inp_file, out_dir, out_file
if (length(ARGS) != 4)
arg_list = "<inp_dir> <inp_file> <out_dir> <out_file>"
@printf("Usage: redo_nodes %s\n", arg_list)
exit()
end
inp_dir, inp_file, out_dir, out_file = ARGS
redo_nodes()
end
function redo_nodes()
#
# Informal call tree:
#
# redo_nodes
# | setup_run
# | | get_pred_info
# | do_node_file
# | | do_node_set
# | | | line_get
# | | | line_unget
println("redo_nodes") #T
setup_run()
do_node_file()
end
function do_node_file()
#
# Walk the nodes.t2 file, emitting merged TSV records.
# Loop while input has something to offer.
println("do_node_file") #T
global f_nodes_inp, f_nodes_out, inp_dir, inp_file, out_dir, out_file
path_inp = "$( inp_dir )/$( inp_file )"
path_out = "$( out_dir )/$( out_file )"
f_nodes_inp = open(path_inp)
f_nodes_out = open(path_out, "w")
while (node = do_node_set() ) != ""
if false #T
println("\nnode: $( node )") #T
end
end
end
function do_node_set()
#
# Handle the next node (including any properties).
# Emit a record to nodes.csv
# println("do_node_set") #T
global parm_hash, prop_list, re_split
parm_hash = Dict{Any,Any}()
re_trim = r"^'([^']+)'.*$"
# Get first line of node set.
line = line_get()
(line == "___EOF___ || ") && return ""
ns_name = replace(line, re_split, s -> "")
while true
line = line_get()
split_arr = split(line, re_split)
split_len = length(split_arr)
subj = split_arr[1]
if (split_len > 2) # Prop line: process and save info.
pred, obj = split_arr[2], split_arr[3]
if (subj != ns_name) #T
println("Warning: '$( subj )' != '$( ns_name )'")
@printf("sizes: %d, %d\n", length(subj), length(ns_name))
end
m = match(re_trim, obj)
parm_hash[pred] = (m == nothing) ? obj : m.captures[1]
else # Node line: emit results and return.
line_unget(line)
ns, name = split(ns_name, '_', 2)
if false #D
@printf(f_nodes_out, "%s\n",
join(vcat([ ns_name, ns, name ],
map(key -> get(parm_hash, key, ""), prop_list)),
fs_tsv))
else
data_list = map(key -> get(parm_hash, key, ""), prop_list)
# data_list = [ get(parm_hash, key, "") for key in prop_list ] # feh!
full_list = vcat([ ns_name, ns, name ], data_list)
full_str = join(full_list, fs_tsv)
if false #D
@printf("data_list: %s\n", typeof(data_list) )
@printf("full_list: %s\n", typeof(full_list) )
@printf("full_str: %s\n", typeof(full_str ) )
@printf("parm_hash: %s\n", typeof(parm_hash) )
@printf("prop_list: %s\n", typeof(prop_list) )
exit()
end
@printf(f_nodes_out, "%s\n", full_str)
end
return ns_name
end
end
end
function get_pred_info()
#
# Get predicate information from the YAML file.
println("get_pred_info") #T
global prop_list
prop_types = {
"__" => "string",
"bo" => "boolean",
"by" => "byte",
"ch" => "char",
"do" => "double", # We use this for all floating point values.
"fl" => "float",
"in" => "int", # We use this for all integer values.
"lo" => "long",
"sh" => "short"
}
raw_path = string(@__FILE__, "/../predicates.yaml")
yaml_path = normpath(raw_path)
yaml_data = YAML.load( open(yaml_path) )
pred_hash = yaml_data["Predicates"]
pred_info = Dict{Any,Any}()
pred_keys = sort( collect( keys(pred_hash) ) )
prop_list = filter(k -> pred_hash[k][1] == "P", pred_keys)
for key in pred_keys
list = pred_hash[key]
info = pred_info[key] = Dict{Any,Any}()
info[:want_prop] = (list[1] == "P")
info[:prop_type] = prop_types[ list[2] ]
info[:index_me] = (list[3] == "I")
info[:exp_text] = list[4]
end
if false #T
@printf("pred_info: '%s'\n", pred_info)
@printf("pred_keys: '%s'\n", pred_keys)
@printf("prop_list: '%s'\n", prop_list)
# @printf("yaml_data: '%s'\n", yaml_data)
end
end
function line_get()
#
# Get a line from the nodes file.
# println("line_get") #T
global f_nodes_inp, line_cache
if line_cache::String != ""
this_line = line_cache::String
line_cache::String = ""
return this_line
end
if eof(f_nodes_inp)
this_line = "___EOF___ || "
else
this_line = chomp( readline(f_nodes_inp) )
end
this_line
end
function line_unget(line)
#
# Unget a line from the nodes file.
# println("line_unget") #T
global line_cache
line_cache::String = line
end
function setup_run()
#
# Set up assorted instance variables for the run.
println("setup_run") #T
global fs_tmp, fs_tsv, lin_lim, line_cache, re_split
const fs_tmp = " || "
const re_split = r" \|\| "
line_cache = ""
cvt_prod = get(ENV, "CVT_PROD", nothing)
run_mode = (cvt_prod == "Y") ? :production : :debug
if (run_mode == :debug) #D
# const fs_tsv = "|"
# const fs_tsv = " | "
const fs_tsv = "\t"
const lin_lim = 1e4
else
const fs_tsv = "\t"
const lin_lim = 1e9
end
get_pred_info()
end
from_shell()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment