# hello Ute, long time no see. library(LFQBench2) library(data.table) library(readxl) library(stringr) library(rstatix) library(readr) # reading in the report R = read_wide_report('2019-107_115 Renauld remeasure_user designed 20200306-122426_quantification_report_UD_sent.xlsx', skip=1, sheet="TOP3 quantification") # header = read_excel("2019-107_115 Renauld remeasure_user designed 20200306-122426_quantification_report_UD_sent.xlsx", # sheet="TOP3 quantification" # "L1:", col_names = FALSE) PD = as.data.table(read_csv("projectDetails.csv")) colnames(PD) = make.names(colnames(PD)) exp = as.data.table(str_split_fixed(PD$replicate_name, ' ', 2)) colnames(exp) = c('state', 'replicate') PD = cbind(PD, exp) rm(exp) PD = PD[state != 'metastasis'] uteTtests = as.data.table(read_csv("ute_t_tests.csv")) # subselect columns: normal_cols = grep("normal ", names(R), value = TRUE) tumor_cols = grep("tumor ", names(R), value = TRUE) R_info = R[,description:entry] all(table(R_info$entry) == 1) # entry uniquely describes each protein and so is a key I_w = R[, c('entry', normal_cols, tumor_cols), with = FALSE] I = melt(I_w, id.vars='entry', variable.name ='exp', value.name = 'I') exp = as.data.table(str_split_fixed(I$exp, ' ', 2)) colnames(exp) = c('state', 'replicate') I = cbind(I, exp) rm(exp) I_small = I[entry %in% c('IGK_HUMAN', 'IGLC2_HUMAN', 'IGL1_HUMAN')] # redoing Ute's analysis I_small[, .(uteTtest=t.test(I~state, data=.SD)$p.value), entry] # this should be done with t.test(I~state+entry, data=I_small), but it does not work. table(I[, uniqueN(state), entry]$V1) pvalsUte = sapply(split(I, I$entry), function(x) try(t.test(data=x, I~state$pvalue))) is.error <- function(x) inherits(x, "try-error") robustTtest = function(data, formula){ r = try(t.test(formula, data), silent=TRUE) if(is.error(r)) return(-1) else return(r$p.value) } UteTest = function(data, formula){ r = try(t.test(formula, data), silent=TRUE, na.action) if(is.error(r)){ X = data[,.(NAcnt = sum(is.na(I))), state] if(any(X$NAcnt > 30)) return(0) else return(1) } else return(r$p.value) } res = I[,.(uteTtest=UteTest(.SD, I~state)), entry] res = uteTtests[res,on='entry'] res[, real_ute_t_test := ifelse(real_ute_t_test==9999,1,real_ute_t_test)] hist(res$real_ute_t_test - res$uteTtest, breaks=1000, main='Excel vs R', xlab='Excel.pvalue - R.pvalue') res[, uteTtestFDR := p.adjust(uteTtest, 'fdr')] hist(res$uteTtestFDR, breaks=100, xlab="Ute's T-test FDR adjusted", main='') I[, exp:=as.character(exp)] I = merge(I, PD[,.(replicate_name, Patient.No)], by.x = 'exp', by.y='replicate_name') bigTest = I[, .(t=UteTest(.SD, I~state)), .(entry, Patient.No)] bigTest[, tFDR:=p.adjust(t, 'fdr')] bigTest[,`:=`(pat_t = paste0('t_', Patient.No), pat_t_FDR=paste0('tFDR_', Patient.No) )] # hist(bigTest[uteTtest <= 1]$uteTtest, breaks=100) res = res[merge(dcast(bigTest, entry~pat_t, value.var='t'), dcast(bigTest, entry~pat_t_FDR, value.var='tFDR'), by='entry'), on='entry'] res = R_info[res, on='entry'] write.csv(res, file='results.csv')