Skip to content

Instantly share code, notes, and snippets.

@sxv
sxv / catan_boards.py
Last active May 14, 2018 17:51
script to run simulations of settlers of catan board tile setups to find the average number of games played before encountering a duplicate arrangement.
import sys, random
# board is defined as list of 19 land tiles: Forest (4), Sheep (4), Wheat (4), Brick (3), Rocks (3), Desert (1)
def new_board(board=''):
available = list('ffffsssswwwwbbbrrrd')
for i in range(19):
random.shuffle(available)
board += available.pop()
return board
@sxv
sxv / combine_raw_reads.py
Created November 18, 2017 08:30
take ENS gene id and gene name and append col[2:] from input files
import sys
rows = []
for line in open('gene_names.csv'):
a,b,c = line.replace('\n','').split(',')
rows.append([a,c])
for file in sys.argv[1:]:
for i, line in enumerate(open(file)):
@sxv
sxv / 3reads.py
Created November 18, 2017 08:29
make fastq; align; make expression table;
# usage:
# python 3reads.py /batch/dir1/ /batch/dir2/ # run all steps on two batches
# python 3reads.py -2 -3 -loadAndKeep /batch/dir # run steps 2 and 3 and use STAR --loadAndKeep
# python 3reads.py /batch/dir1 /batch/dir2/ -13 # run steps 1 and 3 on two batches
import sys, os
base_dir = '/home/sxv/code/s7s/'
MAKE_FASTQ = '%s/make_fastq.sh' % base_dir
ALIGN = '%s/align_smart-3seq.sh' % base_dir
MAKE_EXPRESSION_TABLE = '%s/make_expression_table.R' % base_dir
gawk '//{x=tolower($0); gsub(/[aeiou]/,"",x); a[x]++; b[x]=$0;}END{for(i in a){ if(a[i]==1){print i, b[i]} }}' /usr/share/dict/words
import sys
opts = {
'window': 100000,
'intrachrom': False
}
args = sys.argv[1:]
for a, arg in enumerate(args):
if arg.startswith('-'):
if arg.startswith('-w') or arg.startswith('--window'):
@sxv
sxv / rna_runner.py
Last active October 11, 2016 20:53
# define input cases via master_key.txt
# for each run, align fastq -> bam
# if GATK specified, run GATK, variant calling, and annovar
# run matricizer2 on tab output
# todo: add R clustering scripts
## usage
# python rnarunner.py --input-dir /path/to/fastqs
## NOTES: input-dir must contain master_key.txt, input fastq.gz files, matricizer2.py, UCSC_GENE_NAME3.txt
# define input cases via master_key.txt
# for each run, align fastq -> bam
# if GATK specified, run GATK, variant calling, and annovar
# run matricizer2 on tab output
# todo: add R clustering scripts
## usage
# python rnarunner.py --input-dir /path/to/fastqs
## NOTES: input-dir must contain master_key.txt, input fastq.gz files, matricizer2.py, UCSC_GENE_NAME3.txt
@sxv
sxv / index.html
Created June 12, 2016 04:34
dots1
<canvas id="canvas"></canvas>
<script src="https://d3js.org/d3.v3.min.js"></script>
<script>
var num = 2000;
var canvas = document.getElementById("canvas");
var width = canvas.width = window.innerWidth;
var height = canvas.height = window.innerHeight;
var ctx = canvas.getContext("2d");
var particles = d3.range(num).map(function(i) {
return [Math.round(width*Math.random()), Math.round(height*Math.random()), 2];
@sxv
sxv / genesort.py
Last active August 10, 2016 20:01
import sys
from glob import glob
if (len(sys.argv)<=1): match = '*n0*'
else: match = sys.argv[1]
file = glob(match)[0]
counts = {}
with open(file) as f:
print f.readline()
@sxv
sxv / vcfgenie.sh
Last active August 10, 2016 20:01
#!/usr/bin/env bash
for f in *.vcf; do \
gawk 'BEGIN{FS="\t"; OFS="\t"; } # separator=tab
/CHROM/{
## for(i=0; i<NF; i++){ if($i=="FORMAT"){ samples=(NF-i) } } # todo
if($(NF-2)~/FORMAT/){
samples=2; # two samples?
if($(NF)~/[nN]$/ || $(NF)~/normal$/){ swap=1 } # is order t/n?
}