Suppose a research team is interested in assessing the effectiveness of a new treatment called Magic Therapy (MT) on mental health which is administered across 50 clinics in North America. In each clinic, a group of 10 patients was randomly selected to begin MT, while another group of 10 served as the control group and continued with traditional therapy. The researchers are particularly interested in evaluating the effectiveness of MT at each individual clinic, rather than relying solely on the overall effect across all clinics.

Simulating Example Data

We begin by simulating data under the null hypothesis that MT does not affect mental health, and that there are no differences between clinics or between each clinic and the overall mean effect.

intercept <-  10 # Grand mean
MTeffect <- 0 # Overall, fixed effect
sd_MT <-  0 # (No) variability in cluster-specific coefficients
sigma <-  8 # within-cluster variability
sd_int <-  7 # intercepts variability
cor <-  0 # No association between slopes and intercepts
n <-  20 # sample size in each cluster
n_clusters <- 50 # number of clusters / clinics
N <- n*n_clusters # total sample size
clinic <- rep(1:n_clusters, each = n) # Defining cluster ID
MT <- rep(c(0, 1), length.out = N) # Setting dichotomous predictor: control vs. treat
varmat <- matrix(c(sd_int^2, cor, cor, sd_MT^2), 2, 2) # Variance-covariance matrix

set.seed(311)
re <- mvtnorm::rmvnorm(n_clusters, sigma = varmat) # Generate cluster-level population information from var-covar
colnames(re) <- c('Intercept', 'MTeffect') # renaming the the columns

# Generate outcome variable:
ment_health <- (intercept + re[clinic, 'Intercept'])   +
            (MTeffect  + re[clinic, 'MTeffect'])*MT +  # this line is redundant in the Type I error conditions because it's all 0 in the null condition
                 rnorm(N, sd = sigma)
  
# putting it all together
dat <- data.frame(ment_health, MT = factor(MT), clinic)
head(dat)

Visualizing the Data

By Treatment

dp <- ggplot(dat, aes(x=MT, y=ment_health, fill=MT)) + 
  geom_violin(trim=FALSE)+
  geom_boxplot(width=0.1, fill="white")+
  labs(title="",x="control vs. MT", y = "Mental Health")
dp + scale_fill_brewer(palette="Set2") + theme_minimal()

## By Clinic

dat %>% 
  ggplot(aes(x = MT, y = ment_health, color = factor(clinic), group = factor(clinic)) )+ 
  geom_point() +  # individual trajectory
  geom_smooth(method = "lm", se = F, alpha = 0.8)+
  geom_smooth(aes(group = 1), method = "lm", linewidth=2, linetype='dashed', color='black') +  # overall trend line 
  theme_minimal()

One way we might approach this is by conducting 50 one-sample t-tests comparing the treatment and control groups, as seen in the next section.

The Traditional Approach: Multiple Comparisons

Without Multiplicity Correction

# Defining a function for convenience 
t_test_func <- function(subset_data) {
  result <- t.test(ment_health ~ MT, data = subset_data)
  c(p.value = result$p.value,
    mean_diff = result$estimate[2] - result$estimate[1],
    up_ci = -result$conf.int[1],
    low_ci = -result$conf.int[2])
}

# Use 'by' to split the data by clinic and apply the function
results <- by(dat, dat$clinic, t_test_func)

# Convert results to a data frame
results_df <- data.frame(do.call(rbind, results))

# Extract values from the data frame
p.vals <- results_df$p.value
mean_difs <- results_df$mean_diff
lowcis <- results_df$low_ci
uppcis <- results_df$up_ci

Without multiple comparison adjustments, 2 out of the 50 t tests were incorrectly rejected.

# Counting how many are significant
(t.test_res<-sum(p.vals<=.05))

## [1] 2

# Type I error rate
(t.test_res/n_clusters)

## [1] 0.04

We can plot this:

no_pooling <- data.frame(clinic = seq(1, n_clusters), mean_difs, lowcis, uppcis) %>%
  mutate(significance = ifelse(lowcis <= 0 & uppcis >= 0, "Not sig", "Sig")) %>%
  as_tibble()

ggplot(no_pooling, aes(x = clinic, y = mean_difs)) +
  geom_point() +
  geom_errorbar(aes(ymin = lowcis, ymax = uppcis, color = significance), width = 0.2) +
  labs(x = "Site", y = "Treatment Slope") +
  ggtitle("Point Estimates from t tests with 95% Confidence Intervals") +
  theme_minimal() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  scale_color_manual(values = c("black", "red"))

With Multiplicity Correction

# Bonferroni adjustment
Bonferroni_ps <- p.adjust(p.vals, method = "bonferroni", 
         n = length(p.vals))

sum(Bonferroni_ps<=.05) # none are significant

## [1] 0

# FDR adjustment
FDR_ps <-p.adjust(p.vals, method = "fdr", 
         n = length(p.vals))

sum(FDR_ps<=.05) # none are significant

## [1] 0

Using Multilevel Modeling

mod <- glmmTMB::glmmTMB(ment_health~MT+(1+MT||clinic), data=dat)
  randeffs <- mixedup::extract_random_coefs(mod) %>% filter(effect=="MT1")
  randCOEF <- randeffs$value
  randCIs <- data.frame(randeffs$lower_2.5, randeffs$upper_97.5)
  

  MLM_CI_prod <- apply(randCIs, 1, function(row)  row[1] * row[2])
  (MLM_prop_sig <- sum(MLM_CI_prod > 0)/n_clusters) # none are significant

## [1] 0

part_pooling <- data.frame(clinic = seq(1, n_clusters), randCOEF, lci = randeffs$lower_2.5, uci = randeffs$upper_97.5) %>%
  mutate(significance = ifelse(lci <= 0 & uci >= 0, "Not Sig", "Sig")) %>%
  as_tibble()

ggplot(part_pooling, aes(x = clinic, y = randCOEF)) +
  geom_point() +
  geom_errorbar(aes(ymin = lci, ymax = uci, color = significance), width = 0.2) +
  labs(x = "Site", y = "Treatment Slope") +
  ggtitle("Point Estimates from Freq. MLM with 95% Confidence Intervals") +
  theme_minimal() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  scale_color_manual(values = c("black", "red"))

The plot above shows that the estimated effects of individual clinics are pulled closer to zero, resulting in less biased estimates with greater precision (narrower confidence intervals), and no clinic was mistakenly identified as significantly different from zero. Still, this is just a single simulated dataset. Next, I present results from a preliminary simulation study.

Results From a Pilot Simulation

The following is a preliminary, pilot Monte Carlo simulation with 1000 iterations using SimDesign.

Type I Error Conditions

res.error <- read_csv("/Users/udialter/Library/CloudStorage/GoogleDrive-udi.alter@gmail.com/.shortcut-targets-by-id/1YTLxwhL6sZPwDksLKy12wSf9kHwKL4EQ/Cribbie_Alter/Dissertation (previously Bayes_Multiplicity)/Code/Data/sim_results_TypeIerror_conditions1.csv")

long.error <- res.error %>%
  dplyr::select(n:n_clusters, contains("prop")) %>%
  pivot_longer(cols=contains("prop"), names_to = 'Method', values_to = 'TypeIerror')



long.error $Method <- factor(long.error $Method)

ggplot(long.error, aes(x = n, y = TypeIerror,
                 colour = Method,  
                 linetype = as.factor(n_clusters) 
)) +
  geom_line(linewidth = 2, alpha = 0.7) +
  theme_minimal() +
  theme(
    text = element_text(size = 15),  # Increase the size of all text
    legend.text = element_text(size = 15),
    legend.title = element_text(size = 15, face = "bold"),
  
    axis.text = element_text(size = 15),
    axis.title = element_text(size = 15),
    strip.text = element_text(size = 15),  # Facet label text size
  ) +
  labs(title = "Type I error conditions",y="Type I Error Rate",
    color = "Approach", linetype = "Number of clinics", x = "n per-clinic") +
  scale_linetype_manual(values = c("5" = "dotted", "10" = "dotdash", "20"="dashed","50" = "longdash", "100" = "solid"))

Power Conditions

res.power <- read.csv("~/Library/CloudStorage/GoogleDrive-udi.alter@gmail.com/.shortcut-targets-by-id/1YTLxwhL6sZPwDksLKy12wSf9kHwKL4EQ/Cribbie_Alter/Dissertation (previously Bayes_Multiplicity)/Code/Data/sim_results_power_conditions.csv")

long.power <- res.power %>%
  dplyr::select(n:MTeffect, contains("prop")) %>%
  pivot_longer(cols=contains("prop"), names_to = 'Method', values_to = 'PowerRate')



long.power$Method <- factor(long.power$Method)
long.power$MTeffect <- factor(long.power$MTeffect)


# Custom labeling function to create expressions
custom_label <- function(MTeffect) {
  sapply(MTeffect, function(x) as.expression(bquote(beta[MT] == .(x))))
}

# Plot
ggplot(long.power, aes(x = n, y = PowerRate,
                 colour = Method,  
                 linetype = as.factor(n_clusters) 
)) +
  facet_wrap(~MTeffect, labeller = as_labeller(custom_label, label_parsed)) +
  geom_line(linewidth = 3, alpha = 0.7) +
  theme_minimal() +
  theme(
    text = element_text(size = 30),  # Increase the size of all text
    legend.text = element_text(size = 30),
    legend.title = element_text(size = 30, face = "bold"),
    axis.text = element_text(size = 30),
    axis.title = element_text(size = 30),
    strip.text = element_text(size = 30),  # Facet label text size
  ) +
  labs(title = "Power conditions", y="Power Rate",
       color = "Approach", linetype = "Number of clinics", x = "n per-clinic") +
  scale_linetype_manual(values = c("5" = "dotted", "10" = "dashed",
                                   "50" = "longdash", "100" = "solid")) +
  theme(strip.text = element_text(size = 30, face = "bold"))  # Keep the facet labels bold

Shrinking Concerns Over Multiple Testing with Multilevel Modeling

An Illustrative Example

Udi Alter

2024-12-09