Created
November 11, 2015 09:24
-
-
Save gurix/10c675632798da04d9d2 to your computer and use it in GitHub Desktop.
Revisions
-
gurix created this gist
Nov 11, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,181 @@ "alpha" <- function(x,keys=NULL,cumulative=FALSE,title=NULL,max=10,na.rm=TRUE,check.keys=FALSE,n.iter=1,delete=TRUE,use="pairwise") { #find coefficient alpha given a data frame or a matrix alpha.1 <- function(C,R) { n <- dim(C)[2] alpha.raw <- (1- tr(C)/sum(C))*(n/(n-1)) alpha.std <- (1- n/sum(R))*(n/(n-1)) smc.R <- smc(R) G6 <- (1- (n-sum(smc.R))/sum(R)) av.r <- (sum(R)-n)/(n*(n-1)) sn <- n*av.r/(1-av.r) Q = (2 * n^2/((n-1)^2*(sum(C)^3))) * (sum(C) * (tr(C^2) + (tr(C))^2) - 2*(tr(C) * sum(C^2))) result <- list(raw=alpha.raw,std=alpha.std,G6=G6,av.r=av.r,sn=sn,Q=Q) return(result) } #begin main function cl <- match.call() if(!is.matrix(x) && !is.data.frame(x)) stop('Data must either be a data frame or a matrix') nvar <- dim(x)[2] nsub <- dim(x)[1] scores <- NULL response.freq <- NULL if (nsub !=nvar) { item.var <- apply(x,2,sd,na.rm=na.rm) bad <- which((item.var <= 0)|is.na(item.var)) if((length(bad) > 0) && delete) { for (baddy in 1:length(bad)) {warning( "Item = ",colnames(x)[bad][baddy], " had no variance and was deleted")} x <- x[,-bad] nvar <- nvar - length(bad) } response.freq <- response.frequencies(x,max=max) C <- cov(x,use=use)} else {C <- x} p1 <- principal(x) if(any(p1$loadings < 0)) { if (check.keys) { warning("Some items were negatively correlated with total scale and were automatically reversed.\n This is indicated by a negative sign for the variable name.") keys <- 1- 2* (p1$loadings < 0 ) } else { if(is.null(keys)) { warning(paste("Some items (",rownames(p1$loadings)[(p1$loadings < 0)],") were negatively correlated with the total scale and probably should be reversed. To do this, run the function again with the 'check.keys=TRUE' option")) } } } if(is.null(keys)) {keys <- rep(1,nvar)} else { keys<- as.vector(keys) if(length(keys) < nvar) { temp <- keys # this is the option of keying just the reversals keys <- rep(1,nvar) names(keys) <- colnames(x) keys[temp] <- -1 } } key.d <- diag(keys) C <- key.d %*% C %*% key.d signkey <- strtrim(keys,1) signkey[signkey=="1"] <- "" colnames(x) <- paste(colnames(x),signkey,sep="") if (nsub !=nvar) { #raw data if(any(keys < 0 )) { min.item <- min(x,na.rm=na.rm) max.item <- max(x,na.rm=na.rm) adjust <- max.item + min.item flip.these <- which(keys < 0 ) x[,flip.these] <- adjust - x[,flip.these] } if(cumulative) { total <- rowSums(x,na.rm=na.rm) } else { total <- rowMeans(x,na.rm=na.rm) } mean.t <- mean(total,na.rm=na.rm) sdev <- sd(total,na.rm=na.rm) raw.r <- cor(total,x,use=use) t.valid <- colSums(!is.na(x)) } else { #we are working with a correlation matrix total <- NULL totals <- TRUE } R <- cov2cor(C) drop.item <- vector("list",nvar) alpha.total <- alpha.1(C,R) if(nvar > 2) { for (i in 1:nvar) { drop.item[[i]] <- alpha.1(C[-i,-i,drop=FALSE],R[-i,-i,drop=FALSE]) } } else { drop.item[[1]] <- drop.item[[2]] <- c(rep(R[1,2],2),smc(R)[1],R[1,2],NA,NA)} by.item <- data.frame(matrix(unlist(drop.item),ncol=6,byrow=TRUE)) if(nsub > nvar) { by.item[6] <- sqrt(by.item[6]/nsub) colnames(by.item) <- c("raw_alpha","std.alpha","G6(smc)","average_r","S/N","alpha se") } else { by.item <- by.item[-6] colnames(by.item) <- c("raw_alpha","std.alpha","G6(smc)","average_r","S/N") } rownames(by.item) <- colnames(x) Vt <- sum(R) item.r <- colSums(R)/sqrt(Vt) #this is standardized r #correct for item overlap by using smc RC <-R diag(RC) <-smc(R) Vtc <- sum(RC) item.rc <-colSums(RC)/sqrt(Vtc) #yet one more way to correct is to correlate item with rest of scale if(nvar > 1) { r.drop <- rep(0,nvar) for (i in 1:nvar) { v.drop <- sum(C[-i,-i,drop=FALSE]) c.drop <- sum(C[,i]) - C[i,i] r.drop[i] <- c.drop/sqrt(C[i,i]*v.drop) } } item.means <- colMeans(x, na.rm=na.rm ) item.sd <- apply(x,2,sd,na.rm=na.rm) if(nsub > nvar) { ase = sqrt(alpha.total$Q/nsub) alpha.total <- data.frame(alpha.total[1:5],ase=ase,mean=mean.t,sd=sdev) colnames(alpha.total) <- c("raw_alpha","std.alpha","G6(smc)","average_r","S/N","ase","mean","sd") rownames(alpha.total) <- "" stats <- data.frame(n=t.valid,raw.r=t(raw.r),std.r =item.r,r.cor = item.rc,r.drop = r.drop,mean=item.means,sd=item.sd) } else { alpha.total <- data.frame(alpha.total[-6]) #fixed 27/7/14 colnames(alpha.total) <- c("raw_alpha","std.alpha","G6(smc)" ,"average_r","S/N") rownames(alpha.total) <- "" stats <- data.frame(r =item.r,r.cor = item.rc,r.drop = r.drop) #added r.drop 10/12/13 } rownames(stats) <- colnames(x) if(n.iter > 1) { # do a bootstrap confidence interval for alpha # if(!require(parallel)) {message("The parallel package needs to be installed to run mclapply")} if(nsub == nvar) { message("bootstrapped confidence intervals require raw data") boot <- NULL boot.ci <- NULL } else { boot <- vector("list",n.iter) boot <- mclapply(1:n.iter,function(XX) { xi <- x[sample.int(nsub,replace=TRUE),] C <- cov(xi,use="pairwise") if(!is.null(keys)) { key.d <- diag(keys) xi <- key.d %*% C %*% key.d } R <- cov2cor(C) alpha.1(C,R) }) #end of mclapply boot <- matrix(unlist(boot),ncol=6,byrow=TRUE) colnames(boot) <- c("raw_alpha","std.alpha","G6(smc)","average_r","s/n","ase") boot.ci <- quantile(boot[,1],c(.025,.5,.975)) } } else { boot=NULL boot.ci <- NULL } result <- list(total=alpha.total,alpha.drop=by.item,item.stats=stats,response.freq=response.freq,keys=keys,scores = total,nvar=nvar,boot.ci=boot.ci,boot=boot,call=cl,title=title) class(result) <- c("psych","alpha") return(result) } #modified Sept 8, 2010 to add r.drop feature #modified October 12, 2011 to add apply to the sd function #modified November 2, 2010 to use sd instead of SD #January 30, 2011 - added the max category parameter (max) #June 20, 2011 -- revised to add the check.keys option as suggested by Jeremy Miles #Oct 3, 2013 check for variables with no variance and drop them with a warning #November 22, 2013 Added the standard error as suggested by #modified December 6, 2013 to add empirical confidence estimates #modified January 9, 2014 to add multicore capabilities to the bootstrap #corrected December 18 to allow reverse keying for correlation matrices as well as raw data #modified 1/16/14 to add S/N to summary stats #added item.c (raw correlation) 1/10/15