Skip to content

Instantly share code, notes, and snippets.

@jewel12
Created June 1, 2019 02:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jewel12/4714f3beed0c65aadecc98a6feb9285c to your computer and use it in GitHub Desktop.
Save jewel12/4714f3beed0c65aadecc98a6feb9285c to your computer and use it in GitHub Desktop.
(変な)おもしろコード見せ合い会
require 'json'
probs = Hash.new { |h,k| h[k] = {} }
STDIN.each do |l|
d = JSON.load(l.chomp)
probs[d['l']][d['r']] = (d['prob'] * 1000).to_i
end
def gen(probs)
left = '@@@START@@@'
pbs = probs[left]
generated = []
loop_num = 0
while loop_num < 10000 do
loop_num += 1
left_c = pbs.map{|r, pb| [r] * pb}.inject(:+).sample
pbs_c = probs[left_c]
next if pbs_c.empty?
left = left_c
pbs = pbs_c
break if left == '@@@END@@@'
generated << left
end
return generated
end
loop do
begin
code = gen(probs).map{|g| g.gsub('___ENT___', "\n")}.join(' ')
# 任意のコードを実行するので壊れてもいい環境で実行するべき
eval(code)
puts "-----------------------------"
puts code
rescue Exception
end
end
CREATE TEMPORARY FUNCTION
pairs(c STRING)
RETURNS ARRAY<STRUCT<l STRING,
r STRING>>
LANGUAGE js AS """
const normalized = c.replace(RegExp('\\n+', 'g'), "___ENT___").replace(RegExp(' +', 'g'), " ");
const splited = normalized.split(' ');
let left = '@@@START@@@';
let ps = [];
splited.forEach(right => {
ps.push({'l': left, 'r': right});
left = right;
})
ps.push({'l': left, 'r': '@@@END@@@'});
return ps;
""";
WITH
token_pairs AS (
SELECT
pairs(c.content) ps
FROM
`bigquery-public-data.github_repos.sample_contents` c
JOIN
`bigquery-public-data.github_repos.sample_files` f
ON
c.id = f.id
WHERE
ENDS_WITH(f.path, '.rb')
AND c.content IS NOT NULL ),
token_freqs AS (
SELECT
l,
r,
freq
FROM (
SELECT
token.l l,
token.r r,
COUNT(1) freq
FROM
token_pairs,
UNNEST (token_pairs.ps) AS token
GROUP BY
l,
r )
WHERE
freq >= 5 ),
left_freqs AS (
SELECT
l,
COUNT(1) freq
FROM
token_freqs
GROUP BY
l )
SELECT
tf.l,
tf.r,
tf.freq / lf.freq prob
FROM
token_freqs tf
JOIN
left_freqs lf
ON
tf.l = lf.l
ORDER BY
tf.l
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment