In the end I ended up going with my orthonormal basis approach. In case anyone else wants to try it, it wasn’t very much code. (Hope it’s ok to paste here. NB I am new to R so am sure that can be made tidier, especially at the end!)
library(Hmisc)
library(pracma)
library(ggplot2)
xs <- 1:36
# Construct the orthonormal basis
ones <- rep(1, length(xs))
unnormalised_basis <- cbind(ones, rcspline.eval(xs, nk=4, inclx=TRUE))
basis <- gramSchmidt(unnormalised_basis)$Q
# Our model is
# Y = Σₘ(Eₘ Hₘ) + (1 - λΣₘHₘ)(βF)
# where F ~ N(0, 1)
# and the Hs are modelled as sums of random step functions
random_hours <- function() {
breakpoints <- sort(runif(3, min=9, max=42))
stepfun(breakpoints, c(0, 50, 100, 150))(xs)
}
# Simulate hours/month and noise, and use regression to estimate E
estimate_E <- function(E, lambda, beta, noise_sigma, N) {
# Create a df with one row for each child
hours <- t(replicate(N, random_hours()))
df <- data.frame(
Hproj = hours %*% basis,
F = rnorm(N, 0, 1),
Y = hours %*% E +
(1 - lambda * rowSums(hours)) * beta * F +
rnorm(N, 0, noise_sigma)
)
coeffs <- lm(Y ~ Hproj.1 + Hproj.2 + Hproj.3 + Hproj.4 + F, data=df)$coefficients
basis %*% c(coeffs['Hproj.1'], coeffs['Hproj.2'], coeffs['Hproj.3'], coeffs['Hproj.4'])
}
HOURS_PER_MONTH <- 24 * 7 * 31 # Upper bound
E <- basis %*% c(-8, 11, -3, 0) # True spline
lambda <- 200.0/HOURS_PER_MONTH
beta <- 200
noise_sigma <- 300
N <- 1000
estimated_Es <- replicate(5, estimate_E(E, lambda, beta, noise_sigma, N)[,1])
ggplot(mapping=aes(x = xs)) +
ylim(-10, 10) +
xlab("x") +
ylab("y") +
geom_line(aes(y = E, color="True Eₘ")) +
geom_line(aes(y = estimated_Es[,1], color="Estimated Eₘ (1)"), linetype='dashed') +
geom_line(aes(y = estimated_Es[,2], color="Estimated Eₘ (2)"), linetype='dashed') +
geom_line(aes(y = estimated_Es[,3], color="Estimated Eₘ (3)"), linetype='dashed') +
geom_line(aes(y = estimated_Es[,4], color="Estimated Eₘ (4)"), linetype='dashed') +
geom_line(aes(y = estimated_Es[,5], color="Estimated Eₘ (5)"), linetype='dashed')```