Skip to content

Instantly share code, notes, and snippets.

@sxv
Last active August 10, 2016 20:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sxv/3dbc1c561b1d15e60a35 to your computer and use it in GitHub Desktop.
Save sxv/3dbc1c561b1d15e60a35 to your computer and use it in GitHub Desktop.
import sys
from glob import glob
if (len(sys.argv)<=1): match = '*n0*'
else: match = sys.argv[1]
file = glob(match)[0]
counts = {}
with open(file) as f:
print f.readline()
for line in f:
gene = line.split("\t")[7]
case = line.split("\t")[0]
if gene in counts:
if case not in counts[gene]: counts[gene].append(case)
else: counts[gene] = [case]
for gene in sorted(counts, key = lambda gene: len(counts[gene]), reverse=True):
# if len(counts[gene]) < 2: break
group = []
for line in open(file):
if line.split("\t")[7] == gene:
group.append(line)
lines = ''
cases = []
dups = []
noise = 0
for line in sorted(group, key = lambda x: (x.split("\t")[1], x.split("\t")[2])):
case = line.split("\t")[0]
if case in cases:
if case not in dups:
dups.append(case)
noise += 1 # another one bites the dust
else: cases.append(case)
lines += line
print "%s has %s variants from %s cases (%s cases have exactly 1 variant in this gene)" \
% (gene, len(group), len(set(cases)), len(set(cases))-noise)
print lines
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment