Created
October 18, 2017 16:37
-
-
Save Dudemanguy/e4da2676accecad955341377119c6cfc to your computer and use it in GitHub Desktop.
Revisions
-
Dudemanguy created this gist
Oct 18, 2017 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,156 @@ library(HTSFilter) library(edgeR) suppressPackageStartupMessages(library(DESeq2)) options(max.width=9999999, width=10000) #define functions #user input print("Script for performing differential gene expression between two groups") count_raw <- readline("Enter the file containing the count matrix \n") count_matrix <- read.table(count_raw) sample_query <- readline("Press 1 to enter samples manually. Press 2 to read from a file (each entry on a separate line) \n") #inputting sample groups; provide manual option and reading from a file if (sample_query==1){ columns_to_keep <- readline("Enter the desired samples as numbers separated by a comma prefaced by a lowercase h (e.g. h001,h002,h003,etc.) \n") sample_split <- strsplit(columns_to_keep, ",") count_subset <- subset(count_matrix, select=eval(parse(text=sample_split))) #ask for condition entry condition_query2 <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n") if (condition_query2==1){ define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n") group_split <- strsplit(define_group, ",") group <- unlist(group_split) group <- factor(group, levels=unique(unlist(group_split))) } else if (condition_query2==2){ define_group <- readline("Enter the file name containing the condition \n") group <- readLines(define_group) group <- factor(group, levels=unique(readLines(define_group))) } } #sample entry if (sample_query==2){ sample_query2 <- readline("Press 1 if the file only contains the desired samples. Press 2 if the file also defines the condition (tab delimited) \n") if (sample_query2==1){ print("File only contains sample information") sample_file <- readline("Enter the file name \n") columns_to_keep <- readLines(sample_file) count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep)))) condition_query <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n") #ask for condition entry if (condition_query==1){ define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n") group_split <- strsplit(define_group, ",") group <- unlist(group_split) group <- factor(group, levels=unique(unlist(group_split))) } else if (condition_query==2){ define_group <- readline("Enter the file name containing the condition \n") group <- read.table(define_group) group <- factor(group, levels=unique(read.table(define_group))) } } else if (sample_query==2){ print("File contains sample and condition information") complete_file <- readline("Enter the file name \n") complete_table <- read.table(complete_file) columns_to_keep <- as.vector(complete_table[,1]) count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep)))) group <- complete_table[,2] group <- factor(group, levels=unique(complete_table[,2])) } } #error handling if (exists("count_subset")==FALSE){ print("Error. No count matrix found. Retry entry.") stop() } #get the number of samples in the first group group_table <- table(group) n <- group_table[[1]][1] #output directory for results dir_output <- readline("Enter the name of the output directory \n") dir.create(dir_output) #apply HTSFilter filter <- HTSFilter(count_subset, group, s.min=1, s.max=200, s.len=25) count_filter <- filter$filteredData #apply edgeR y <- DGEList(counts=count_filter, group=group) y <- calcNormFactors(y) y <- estimateDisp(y) et <- exactTest(y) #apply DESeq2 group_dataframe <- data.frame(group) dds <- DESeqDataSetFromMatrix(countData=count_filter, colData=group_dataframe, design=~group) dds <- DESeq(dds) res <- results(dds) #create data frame for edgeR results et_results <- topTags(et, n=Inf, sort.by="none") write.table(et_results, file="et_output", sep="\t") et_table <- read.table("et_output") et_table <- et_table[-(2)] file.remove("et_output") #create data frame for DESeq2 results resOrdered <- res[order(res$padj),] write.table(resOrdered, file="DESeq_output", sep="\t") res_order <- read.table("DESeq_output") file.remove("DESeq_output") #grab counts, average them, add to data frame, and sort a <- count_filter[,1:n] b <- count_filter[,(n+1):ncol(count_filter)] c <- data.frame(rowMeans(a)) d <- data.frame(rowMeans(b)) et_table["Avg Ct A"] <- c et_table["Avg Ct B"] <- d et_table <- et_table[,c(4,5,1,2,3)] et_table <- et_table[order(et_table$FDR, decreasing=FALSE),] res_order["Avg Ct A"] <- c res_order["Avg Ct B"] <- d res_order <- res_order[,c(7,8,1,2,3,4,5,6)] #print results and save to file setwd(dir_output) dir.create("edgeR") setwd("edgeR") sink("dgelist_output") print(y) sink() write.table(et_table, file="full_results", sep="\t", quote=FALSE) et_sig <- et_table[which(et_table$FDR<0.05),] write.table(et_sig, file="significant_results", sep="\t", quote=FALSE) setwd("..") dir.create("DESeq2") setwd("DESeq2") sink("deseq2_summary") print(summary(res)) sink() write.table(res_order, file="full_results", sep="\t", quote=FALSE) res_sig <- res_order[which(res_order$padj<0.05),] write.table(res_sig, file="significant_results", sep="\t", quote=FALSE) setwd("..") setwd("..")