I came across this blog post by Prof. Harrell Ordinal Models for Paired Data – Statistical Thinking.
I was trying to replicate the simulation in order to estimate the sample size for my paired study. Eveyrthing works perfectly and I am getting a reasonable sample size estimation. I just want to make sure that I am not missing any important details or doing anything wrong.
Here is my code
# Key metrics for the simulation (dummy data)
before <- c(1.2,2.3,3.5,2.1,4.2,2.2,4.2,5.4,2.5,5.6,5.7,2.8,5.3,2.2,1.2)
after <- c(4.2,2.3,3.5,2.1,2.2,2.2,5.2,5.4,2.5,5.1,5.4,2.2,5.3,2.2,4.2)
delta_obs <- mean(before) - mean(after)
sd_obs <- sd(before)
rho_obs <- 0.5 # used the same rho used by prof. Harrell
get_paired_power <- function(n, reps = 500) {
p_values <- replicate(reps, {
y1 <- rnorm(n, mean = 0, sd = sd_obs)
y2 <- rnorm(n, mean = delta_obs + rho_obs * y1, sd = sqrt(1 - rho_obs^2) * sd_obs)
y_sim <- c(y1, y2)
x_sim <- c(rep(0, n), rep(1, n))
id_sim <- factor(c(1:n, 1:n))
f <- orm(y_sim ~ x_sim, x=TRUE, y=TRUE)
g <- robcov(f, id_sim)
anova(g)['x_sim', 'P']
})
mean(p_values < 0.05, na.rm = TRUE)
}
targets <- c(0.80) # here I am using 0.8 only
results <- data.frame(Power_Target = targets, Required_Pairs = NA)
current_n <- 140
for (i in 1:length(targets)) {
while (get_paired_power(current_n) < targets[i]) {
current_n <- current_n + 10 # Incrementing pairs
print(current_n)
}
results$Required_Pairs[i] <- current_n
}
print(results)