jaquol · March 17, 2020 18:00
diff --git a/.block b/.block
diff --git a/README.md b/README.md
diff --git a/code.R b/code.R
 diff_exp_sleuth <- function(condition1, condition2) {

 	# re-format metadata to meet sleuth's required format
 	# later on, the 'sleuth_prep' function of sleuth will assign 0=untreated and 1=treated condition
 	# importantly, by default such function will set '0' to first alphabetical match in the values
 	# found in the 'condition' field so here we force that 0=condition1
 	tab_metadata <- metadata
 	rownames(tab_metadata) <- tab_metadata$sample_id
 	
 	conditions <- c()
 	for (i in 1:nrow(tab_metadata)) {
 		tag <- paste(tab_metadata[i, 'treatment'], tab_metadata[i, 'treatment_time'], sep = '_')
 		if (tag == condition1) {
 			conditions <- c(conditions, 0)
 		}
 		else if (tag == condition2) {
 			conditions <- c(conditions, 1)
 		}
 		else {
 			conditions <- c(conditions, -1)
 		}
 	}
 	tab_metadata$condition <- conditions

 	# subset samples to include only those from the 2 conditions compared
 	cond1 <- tab_metadata$condition == 0
  	cond2 <- tab_metadata$condition == 1
 	tab_metadata <- tab_metadata[cond1 | cond2, ]

 	samples <- tab_metadata$sample_id

 	# get paths to data
 	paths <- c()
 	for (s in samples) {
 		p <- paste0(SAMPLES, "/", s, "/quantifications/kallisto/", assembly_version, '/', sequencing_type)
 		paths <- c(paths, p)
 	}
 	tab_metadata$path <- paths

 	# rename to meet sleuth's format requirements
 	tab_metadata <- tab_metadata[, c('sample_id', 'condition', 'path')]
 	names(tab_metadata)[1] <- 'sample'

 	 # (1) load the kallisto processed data and make a regression model using 'condition' as the dependent variable
  	so <- sleuth_prep(tab_metadata, ~ condition)
  
 	# (2) estimate parameters for the sleuth response error measurement (full) model as responding to the 'condition' factor
  	so <- sleuth_fit(so)
  
 	# (3) Create another model where the gene expression is not dependent on any factor.
  	so <- sleuth_fit(so, ~1, 'reduced')
  
 	# (4.1) Run a likelihood ratio test (LRT) between the two models to see what transcripts appear 
 	# to really be affected by the time factor value
 	so <- sleuth_lrt(so, 'reduced', 'full')

 	# (4.2) Run the Wald test (WT), a statistical tests which:
 	# - is somewhat related to the LRT and is also used to test for differential expression
 	# - LRT is considerd a better test than the WT but
 	# - WT is used becase it generates the beta statistic, which approximates to the fold change in expression between
 	# the 2 condition tested, which is typically reported in differential expression analysis
 	so <- sleuth_wt(so, paste0('condition'))

 	# export normalised abundance values
 	condition1_name <- tolower(condition1)
 	condition2_name <- tolower(condition2)
 	otab = paste0(ANALYSIS, "/tables/normalized_abundance_transcript_level_sleuth_", condition1_name, "_", condition2_name, ".tsv")
 	write.table(kallisto_table(so), otab, sep = "\t", quote = FALSE, row.names = FALSE)

 	# add beta (b), beta's standard error (se_b) and the mean expression in the samples (mean_obs)
 	res_lrt <- sleuth_results(so, 'reduced:full', test_type = 'lrt')
 	res_wt <- sleuth_results(so, 'condition')
 	res <- merge(res_lrt, res_wt[, c('target_id', 'b', 'se_b', 'mean_obs')], on = 'target_id', sort = FALSE)
 	
 	# export
 	condition1_name <- tolower(condition1)
 	condition2_name <- tolower(condition2)
 	otab = paste0(ANALYSIS, "/tables/differential_expression_analysis_transcript_level_sleuth_", condition1_name, "_", condition2_name, ".tsv")
 	write.table(res, otab, sep = "\t", quote = FALSE, row.names = FALSE)

 	return(so)

 }

 sleuth_object = diff_exp_sleuth("Untreated_0", "Doxycycline_480")
 sleuth_object = diff_exp_sleuth("Untreated_0", "Doxycycline_960")
	diff_exp_sleuth <- function(condition1, condition2) {

	# re-format metadata to meet sleuth's required format
	# later on, the 'sleuth_prep' function of sleuth will assign 0=untreated and 1=treated condition
	# importantly, by default such function will set '0' to first alphabetical match in the values
	# found in the 'condition' field so here we force that 0=condition1
	tab_metadata <- metadata
	rownames(tab_metadata) <- tab_metadata$sample_id

	conditions <- c()
	for (i in 1:nrow(tab_metadata)) {
	tag <- paste(tab_metadata[i, 'treatment'], tab_metadata[i, 'treatment_time'], sep = '_')
	if (tag == condition1) {
	conditions <- c(conditions, 0)
	}
	else if (tag == condition2) {
	conditions <- c(conditions, 1)
	}
	else {
	conditions <- c(conditions, -1)
	}
	}
	tab_metadata$condition <- conditions

	# subset samples to include only those from the 2 conditions compared
	cond1 <- tab_metadata$condition == 0
	cond2 <- tab_metadata$condition == 1
	tab_metadata <- tab_metadata[cond1 \| cond2, ]

	samples <- tab_metadata$sample_id

	# get paths to data
	paths <- c()
	for (s in samples) {
	p <- paste0(SAMPLES, "/", s, "/quantifications/kallisto/", assembly_version, '/', sequencing_type)
	paths <- c(paths, p)
	}
	tab_metadata$path <- paths

	# rename to meet sleuth's format requirements
	tab_metadata <- tab_metadata[, c('sample_id', 'condition', 'path')]
	names(tab_metadata)[1] <- 'sample'

	# (1) load the kallisto processed data and make a regression model using 'condition' as the dependent variable
	so <- sleuth_prep(tab_metadata, ~ condition)

	# (2) estimate parameters for the sleuth response error measurement (full) model as responding to the 'condition' factor
	so <- sleuth_fit(so)

	# (3) Create another model where the gene expression is not dependent on any factor.
	so <- sleuth_fit(so, ~1, 'reduced')

	# (4.1) Run a likelihood ratio test (LRT) between the two models to see what transcripts appear
	# to really be affected by the time factor value
	so <- sleuth_lrt(so, 'reduced', 'full')

	# (4.2) Run the Wald test (WT), a statistical tests which:
	# - is somewhat related to the LRT and is also used to test for differential expression
	# - LRT is considerd a better test than the WT but
	# - WT is used becase it generates the beta statistic, which approximates to the fold change in expression between
	# the 2 condition tested, which is typically reported in differential expression analysis
	so <- sleuth_wt(so, paste0('condition'))

	# export normalised abundance values
	condition1_name <- tolower(condition1)
	condition2_name <- tolower(condition2)
	otab = paste0(ANALYSIS, "/tables/normalized_abundance_transcript_level_sleuth_", condition1_name, "_", condition2_name, ".tsv")
	write.table(kallisto_table(so), otab, sep = "\t", quote = FALSE, row.names = FALSE)

	# add beta (b), beta's standard error (se_b) and the mean expression in the samples (mean_obs)
	res_lrt <- sleuth_results(so, 'reduced:full', test_type = 'lrt')
	res_wt <- sleuth_results(so, 'condition')
	res <- merge(res_lrt, res_wt[, c('target_id', 'b', 'se_b', 'mean_obs')], on = 'target_id', sort = FALSE)

	# export
	condition1_name <- tolower(condition1)
	condition2_name <- tolower(condition2)
	otab = paste0(ANALYSIS, "/tables/differential_expression_analysis_transcript_level_sleuth_", condition1_name, "_", condition2_name, ".tsv")
	write.table(res, otab, sep = "\t", quote = FALSE, row.names = FALSE)

	return(so)

	}

	sleuth_object = diff_exp_sleuth("Untreated_0", "Doxycycline_480")
	sleuth_object = diff_exp_sleuth("Untreated_0", "Doxycycline_960")