Skip to content

Instantly share code, notes, and snippets.

@sxv
Created November 18, 2017 08:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sxv/012b3db4e679675a3a26e3bb43c64f51 to your computer and use it in GitHub Desktop.
Save sxv/012b3db4e679675a3a26e3bb43c64f51 to your computer and use it in GitHub Desktop.
make fastq; align; make expression table;
# usage:
# python 3reads.py /batch/dir1/ /batch/dir2/ # run all steps on two batches
# python 3reads.py -2 -3 -loadAndKeep /batch/dir # run steps 2 and 3 and use STAR --loadAndKeep
# python 3reads.py /batch/dir1 /batch/dir2/ -13 # run steps 1 and 3 on two batches
import sys, os
base_dir = '/home/sxv/code/s7s/'
MAKE_FASTQ = '%s/make_fastq.sh' % base_dir
ALIGN = '%s/align_smart-3seq.sh' % base_dir
MAKE_EXPRESSION_TABLE = '%s/make_expression_table.R' % base_dir
steps = '123'
batch_dirs = []
for arg in sys.argv[1:]:
if arg[0] == '-':
if arg == '-loadAndKeep' or arg == '-keep':
ALIGN = '%s/align_smart-3seq.loadAndKeep.sh' % base_dir
else:
if steps == '123':
steps = arg[1:]
else:
steps += arg[1:]
else:
batch_dirs.append(arg)
def make_fastq():
os.system("bash -c 'cp %s %s'" % (MAKE_FASTQ, batch_dir))
os.system("bash -c 'cd %s && bash %s .'" % (batch_dir, MAKE_FASTQ))
os.system("bash -c 'cd %s && mkdir -p fastq && mv *gz fastq'" % batch_dir)
def align():
os.system("bash -c 'cp %s %s/fastq/'" % (ALIGN, batch_dir))
os.system("bash -c 'cd %s/fastq && bash %s /media/stroma-common/genome/hg38/star/dbsnp147_gencode25-68/ *fastq.gz'" % (batch_dir, ALIGN))
os.system("bash -c 'cd %s && mkdir -p bam && mv fastq/*ba? fastq/*log bam/'" % batch_dir)
def make_expression_table():
os.system("bash -c 'cp %s %s/bam'" % (MAKE_EXPRESSION_TABLE, batch_dir))
os.system("bash -c 'cd %s/bam && Rscript %s --no-rlog /media/stroma-common/genome/hg38/gencode.v25.annotation.gtf *bam'" % (batch_dir, MAKE_EXPRESSION_TABLE))
os.system("bash -c 'cd %s/bam && xlsx2csv -s 1 gene_expression.xlsx raw_reads.csv'" % batch_dir)
project_name = batch_dir.split('/')[-1] or batch_dir.split('/')[-2]
os.system("bash -c 'cd %s/bam && cp raw_reads.csv %s.raw_reads.csv && cp raw_reads.csv %sresults/%s.raw_reads.csv'" % (batch_dir, project_name, base_dir, project_name))
for batch_dir in batch_dirs:
if '1' in steps: make_fastq()
if '2' in steps: align()
if '3' in steps: make_expression_table()
# if 'o' in steps: optional_step()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment