Dudemanguy · October 18, 2017 16:37 · Oct 18, 2017
diff --git a/diffgene_script.R b/diffgene_script.R
@@ -0,0 +1,156 @@
+library(HTSFilter)
+library(edgeR)
+suppressPackageStartupMessages(library(DESeq2))
+options(max.width=9999999, width=10000)
+
+#define functions
+
+#user input
+print("Script for performing differential gene expression between two groups")
+count_raw <- readline("Enter the file containing the count matrix \n")
+count_matrix <- read.table(count_raw)
+sample_query <- readline("Press 1 to enter samples manually. Press 2 to read from a file (each entry on a separate line) \n")
+
+#inputting sample groups; provide manual option and reading from a file
+if (sample_query==1){
+	columns_to_keep <- readline("Enter the desired samples as numbers separated by a comma prefaced by a lowercase h (e.g. h001,h002,h003,etc.) \n")
+	sample_split <- strsplit(columns_to_keep, ",")
+	count_subset <- subset(count_matrix, select=eval(parse(text=sample_split)))
+
+	#ask for condition entry
+	condition_query2 <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n")
+	if (condition_query2==1){
+		define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n")
+		group_split <- strsplit(define_group, ",")
+		group <- unlist(group_split)
+		group <- factor(group, levels=unique(unlist(group_split)))
+	}
+
+	else if (condition_query2==2){
+		define_group <- readline("Enter the file name containing the condition \n")
+		group <- readLines(define_group)
+		group <- factor(group, levels=unique(readLines(define_group)))
+	}
+}
+
+#sample entry
+if (sample_query==2){
+	sample_query2 <- readline("Press 1 if the file only contains the desired samples. Press 2 if the file also defines the condition (tab delimited) \n")
+	if (sample_query2==1){
+		print("File only contains sample information")
+		sample_file <- readline("Enter the file name \n") 
+		columns_to_keep <- readLines(sample_file)
+		count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep))))
+		condition_query <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n")
+
+		#ask for condition entry
+			if (condition_query==1){
+				define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n")
+				group_split <- strsplit(define_group, ",")
+				group <- unlist(group_split)
+				group <- factor(group, levels=unique(unlist(group_split)))
+			}
+
+			else if (condition_query==2){
+				define_group <- readline("Enter the file name containing the condition \n")
+				group <- read.table(define_group)
+				group <- factor(group, levels=unique(read.table(define_group)))
+			}
+	}
+
+	else if (sample_query==2){
+		print("File contains sample and condition information")
+		complete_file <- readline("Enter the file name \n") 
+		complete_table <- read.table(complete_file) 
+		columns_to_keep <- as.vector(complete_table[,1])
+		count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep))))
+		group <- complete_table[,2]
+		group <- factor(group, levels=unique(complete_table[,2]))
+	}
+}
+
+
+#error handling
+
+if (exists("count_subset")==FALSE){
+	print("Error. No count matrix found. Retry entry.")
+	stop()
+}
+
+#get the number of samples in the first group
+group_table <- table(group)
+n <- group_table[[1]][1]
+
+#output directory for results
+
+dir_output <- readline("Enter the name of the output directory \n")
+dir.create(dir_output) 
+
+#apply HTSFilter
+filter <- HTSFilter(count_subset, group, s.min=1, s.max=200, s.len=25)
+count_filter <- filter$filteredData
+
+#apply edgeR
+y <- DGEList(counts=count_filter, group=group)
+y <- calcNormFactors(y)
+y <- estimateDisp(y)
+et <- exactTest(y)
+
+#apply DESeq2
+group_dataframe <- data.frame(group)
+dds <- DESeqDataSetFromMatrix(countData=count_filter, colData=group_dataframe, design=~group)
+dds <- DESeq(dds)
+res <- results(dds)
+
+#create data frame for edgeR results
+et_results <- topTags(et, n=Inf, sort.by="none")
+write.table(et_results, file="et_output", sep="\t")
+et_table <- read.table("et_output")
+et_table <- et_table[-(2)]
+file.remove("et_output")
+
+#create data frame for DESeq2 results
+resOrdered <- res[order(res$padj),]
+write.table(resOrdered, file="DESeq_output", sep="\t")
+res_order <- read.table("DESeq_output")
+file.remove("DESeq_output")
+
+#grab counts, average them, add to data frame, and sort
+
+a <- count_filter[,1:n]
+b <- count_filter[,(n+1):ncol(count_filter)]
+c <- data.frame(rowMeans(a))
+d <- data.frame(rowMeans(b))
+et_table["Avg Ct A"] <- c
+et_table["Avg Ct B"] <- d
+et_table <- et_table[,c(4,5,1,2,3)]
+et_table <- et_table[order(et_table$FDR, decreasing=FALSE),]
+
+res_order["Avg Ct A"] <- c
+res_order["Avg Ct B"] <- d
+res_order <- res_order[,c(7,8,1,2,3,4,5,6)]
+
+#print results and save to file
+
+setwd(dir_output)
+
+dir.create("edgeR")
+setwd("edgeR")
+sink("dgelist_output")
+print(y)
+sink()
+write.table(et_table, file="full_results", sep="\t", quote=FALSE)
+et_sig <- et_table[which(et_table$FDR<0.05),]
+write.table(et_sig, file="significant_results", sep="\t", quote=FALSE)
+setwd("..")
+
+dir.create("DESeq2")
+setwd("DESeq2")
+sink("deseq2_summary")
+print(summary(res))
+sink()
+write.table(res_order, file="full_results", sep="\t", quote=FALSE)
+res_sig <- res_order[which(res_order$padj<0.05),]
+write.table(res_sig, file="significant_results", sep="\t", quote=FALSE)
+setwd("..")
+setwd("..")
No results found