# --- # Simuations for the example in section 2 of the paper. # This code is for the S-Plus programming system. # Functions are modularized into small tasks to generate the data, # do the t-test, estimate the parameters, derive the corrected p-values, # and summarize the set of them. # The centerpiece is the call corP(genData()) that corrects the p-value # of a generated data set. # Note: we set a specific (pseudo) random seed, such that re-running # the simulation will produce identical results. # --- # Generation of simulated data. # Arguments: threshold, number of multivariate Normal random numbers, # mean, standard deviation, and correlation. genData <- function(thr = 0, N = 100, mn = c(0,0), sd = c(1,1), rho = 0.7) { val <- array(rnorm(N * 2), c(2, N)) covFact <- diag(sd) %*% t(chol(array(c(1, rho, rho, 1), c(2, 2))) ) val <- covFact %*% val + mn val <- t(val) ind <- val[,1] > thr val <- data.frame(val) names(val) <- c("x0", "x1") attr(val, "ind") <- ind attr(val, "thr") <- thr val } # --- # Carrying out a paired t-test for equality of means. # The data set must be a list with two components, x0 and x1. # The function returns the p-value of the test. tTest <- function(data, sub = T) { # paired t-test on full data or subset if (sub) data <- data[attr(data, "ind"),] t.test(data$x0 - data$x1)$p.value } # --- # Estimation of mean and covariance of a multivariate Normal # (two-dimensional in our application). # The input data set is assumed to be a matrix with 2 columns. # The function returns a list containing mean, std dev, and correlation. estPars <- function(data) { mn <- apply(data, 2, mean) cv <- var(data) mn <- rep(mean(mn), 2) sd <- rep(mean(diag(cv)), 2) rho <- cv[1,2]/sqrt(cv[1,1]*cv[2,2]) list(mn = mn, sd = sd, rho = rho) } # --- # Correcting the p-value: # This function conducts nSim simulations to correct an observed p-value. # The uncorrected p-value is calculated here, and the corrected p-value # is returned by this function. corP <- function(data, nSim = 100) { pv0 <- tTest(data) est <- estPars(data) aux <- double(nSim) thr <- attr(data, "thr") N <- nrow(data) for(i in 1:nSim) { aux[i] <- tTest(genData(thr, N, est$mn, est$sd, est$rho)) } val <- mean(aux <= pv0) attr(val, "pv0") <- pv0 val } # --- # This code carries out the simulations (!). # Note that up to here we have only declarations of functions, # but nothing gets executed. # The following executes the simulations. # Probably the only thing you want to modify is the value of nsim. # On a standard PC, 1,000 simulations take a good half hour. # random number initialization seed <- c(61, 11, 51, 37, 16, 2, 39, 18, 12, 29, 31, 2) set.seed(seed) nsim <- 1000 ## Comparing p-values for full data, subset unadjusted and subset adjusted pVal <- array(0, c(nsim, 3), list(1:nsim, c("Full", "Obs", "Adj"))) date() cat("Running", nsim, "simulations:\n") for(i in 1:nsim) { if (i %% 50 == 0) cat(i, " ") dat <- genData() aux <- corP(dat) pVal[i, ] <- c(tTest(dat, sub = F), attr(aux, "pv0"), aux) } # --- # the central result: unadjusted and adjusted p-values pVal <- data.frame(pVal) # --- # Create graphs - need to use the line below in R, but not in S-PLUS #library(lattice) trellis.device(color = F) # create data sets suitable for grouped histograms # Figure 1 pdata1 <- data.frame(pval=c(pVal$Full, pVal$Obs), group=factor(rep(c("Full data", "x0 > 0"), each = nrow(pVal)), levels = c("x0 > 0", "Full data"))) # histogram of p-values histogram(~pmax(pval, 0.00001)| group, pdata1, xlab = "p-values", # main = "Distribution of p-values", breaks=seq(0, 1, by=0.05), layout=c(1, 2), # 2 rows of one graph each strip=function(...) strip.default(..., style=1), # take out that bar in the strip par.strip.text=list(cex=1.2)) # larger characters in strips # Figure 2 pdata2 <- data.frame(pval=c(pVal$Obs, pVal$Adj), group=factor(rep(c("Observed", "Adjusted"), each = nrow(pVal)), levels = c("Adjusted", "Observed"))) # histogram of p-values histogram(~pmax(pval, 0.00001)| group, pdata2, xlab = "p-values", breaks=seq(0, 1, by=0.05), layout=c(1, 2), # 2 rows of one graph each strip=function(...) strip.default(..., style=1), # take out that bar in the strip par.strip.text=list(cex=1.2)) # larger characters in strips # Figure 3 xyplot(Adj ~ Obs, pVal, xlab = "Observed p-value", ylab = "Adjusted p-value", aspect=1, # scales=list(limits=c(-0.05, 1.05)), # for R scales=list(limits=c(0, 1)), # for S-PLUS panel = function(x, y, ...) { panel.xyplot(x, y, ...) if(length(x) > 50) panel.loess(x, y, span = 0.3, col = 1, lwd = 3) } ) # --- # Finally show the central result in numbers: # Fraction of significant calls before and after adjustment # (mean of the 0/1 variable whether or not each p-value is below 0.05) sig.calls <- c( "observed"=mean(pVal$Obs <= 0.05), "adjusted"=mean(pVal$Adj <= 0.05) ) cat("Fraction of significant calls before and after correction:\n") print(sig.calls) # --- # The End. # The full data, observed and adjusted p-values are in pVal, # the summary statistics (fraction of significant calls) in sig.calls, # and the graphs are stored in graphs.