Here, I rerun the models documented in Arslan et al. (2023). Not within spitting distance: Salivary immunoassays of estradiol have subpar validity for predicting cycle phase. The changes are: - switched models to be multivariate with residual correlations between outcomes. - dropped one outcome (fertile_lh) which was a side interest and led to many rows being dropped because of missingness - added an outcome (log LH, luteinising hormone). It wasn’t part of the original set and since I needed it for a different project, I added it here.
The imputed values changed only very slightly. I’ve documented the mistake of not using mutually adjusted multivariate predictors here.
Warning: package 'dplyr' was built under R version 4.5.2
── Attaching core tidyverse packages ────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.1 ✔ readr 2.1.5
✔ forcats 1.0.1 ✔ stringr 1.5.2
✔ ggplot2 4.0.0 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.1.0
── Conflicts ──────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Loading required package: Rcpp
Loading 'brms' package (version 2.23.0). Useful instructions
can be found by typing help('brms'). A more detailed introduction
to the package is available through vignette('brms_overview').
Attaching package: 'brms'
The following object is masked from 'package:stats':
ar
library(rio)
plot_curve <- function(model, method = "fitted", resp = NULL) {
cond <- tibble(.rows = 1)
cond[[names(model$data)[2]]] <- "none"
if (!is.null(resp)) {
plot(conditional_effects(model, method = method,
conditions = cond,
spaghetti = TRUE, nsamples = 100, resp = resp),
points = TRUE,
point_args = list(alpha = 0.05),
spaghetti_args = list(alpha = 0.01, colour = '#FF000007'),
line_args = list(size = 0),
plot = FALSE)[[1]]
} else {
plot(conditional_effects(model, method = method,
conditions = cond,
spaghetti = TRUE, nsamples = 100),
points = TRUE,
point_args = list(alpha = 0.05),
spaghetti_args = list(alpha = 0.01, colour = '#FF000007'),
line_args = list(size = 0),
plot = FALSE)[[1]]
}
}
rmse <- function(error) { sqrt(mean(error^2, na.rm = TRUE)) }
rmse_brms <- function(model) {
resid <- residuals(model, summary = F)
rmse_samples <- apply(resid, 1, rmse)
rmse_ci <- rstantools::posterior_interval(as.matrix(rmse_samples), prob = 0.95)
sprintf("%.2f [%.2f;%.2f]", mean(rmse_samples), rmse_ci[1], rmse_ci[2])
}
mae <- function(error) { mean(abs(error), na.rm = TRUE) }
mae_brms <- function(model) {
resid <- residuals(model, summary = F)
mae_samples <- apply(resid, 1, mae)
mae_ci <- rstantools::posterior_interval(as.matrix(mae_samples), prob = 0.95)
sprintf("%.2f [%.2f;%.2f]", mean(mae_samples), mae_ci[1], mae_ci[2])
}
loo_ci <- function(model) {
loo_R <- loo_R2(model, re_formula = NA)
loo_R <- sqrt(loo_R)
sprintf("%.2f [%.2f;%.2f]", loo_R[,"Estimate"], loo_R[,"Q2.5"], loo_R[,"Q97.5"])
}
biocycle <- readRDS("biocycle.rds")
options(mc.cores = parallel::detectCores(), brms.backend = "cmdstanr", brms.file_refit = "on_change")
rstan::rstan_options(auto_write = TRUE)
biocycle <- biocycle %>% filter(between(cycle_length, 20, 35))
biocycle$logE_P <- log(biocycle$E_P)
biocycle$logE <- log(biocycle$total_estradiol)
biocycle$logP <- log(biocycle$progesterone)
biocycle$logT <- log(biocycle$Testo)
biocycle$logFE <- log(biocycle$estradiol)
biocycle$logFT <- log(biocycle$`Free T (ng/dL)`)
biocycle$logLH <- log(biocycle$LH)
biocycle$fertile_lh <- NULL
bc_days <- data.frame(
bc_day = c(-28:-1, -29:-40),
prc_stirn_bc = c(.01, .01, .02, .03, .05, .09, .16, .27, .38, .48, .56, .58, .55, .48, .38, .28, .20, .14, .10, .07, .06, .04, .03, .02, .01, .01, .01, .01, rep(.01, times = 12)),
# rep(.01, times = 70)), # gangestad uses .01 here, but I think such cases are better thrown than kept, since we might simply have missed a mens
prc_wcx_bc = c(.000, .000, .001, .002, .004, .009, .018, .032, .050, .069, .085, .094, .093, .085, .073, .059, .047, .036, .028, .021, .016, .013, .010, .008, .007, .006, .005, .005, rep(.005, times = 12))
) %>% arrange(bc_day)
fc_days <- data.frame(
fc_day = c(0:39),
prc_stirn_fc = c(.01, .01, .02, .03, .05, .09, .16, .27, .38, .48, .56, .58, .55, .48, .38, .28, .20, .14, .10, .07, .06, .04, .03, .02, .01, .01, .01, .01, rep(.01, times = 12)),
# rep(.01, times = 70)), # gangestad uses .01 here, but I think such cases are better thrown than kept, since we might simply have missed a mens
prc_wcx_fc = c(.000, .000, .001, .002, .004, .009, .018, .032, .050, .069, .085, .094, .093, .085, .073, .059, .047, .036, .028, .021, .016, .013, .010, .008, .007, .006, .005, .005, rep(.005, times = 12))
)
lh_days <- tibble(
conception_risk_lh = c(rep(0,8), 0.00, 0.01, 0.02, 0.06, 0.16, 0.20, 0.25, 0.24, 0.10, 0.02, 0.02, rep(0,12)),
lh_day = -15:15
) %>%
mutate(fertile_lh = conception_risk_lh/max(conception_risk_lh)*0.25/0.31)
biocycle <- biocycle %>% left_join(lh_days %>% select(lh_day, fertile_lh), by = "lh_day")
bf_e_bc <- bf(logE ~ s(bc_day) + (1 | id) + (1|id:cycle))
bf_fe_bc <- bf(logFE ~ s(bc_day) + (1 | id) + (1|id:cycle))
bf_p_bc <- bf(logP ~ s(bc_day) + (1 | id) + (1|id:cycle))
bf_t_bc <- bf(logT ~ s(bc_day) + (1 | id) + (1|id:cycle))
bf_ft_bc <- bf(logFT ~ s(bc_day) + (1 | id) + (1|id:cycle))
bf_lh_bc <- bf(logLH ~ s(bc_day) + (1 | id) + (1|id:cycle))
mod_mv_bc <- brm(
bf_e_bc + bf_fe_bc + bf_p_bc + bf_t_bc + bf_ft_bc + bf_lh_bc + set_rescor(TRUE),
data = biocycle,
control = list(adapt_delta = 0.95, max_treedepth = 20),
file = 'models/mod_mv_bc'
)
Warning: Rows containing NAs were excluded from the model.
mod_mv_bc
Family: MV(gaussian, gaussian, gaussian, gaussian, gaussian, gaussian)
Links: mu = identity
mu = identity
mu = identity
mu = identity
mu = identity
mu = identity
Formula: logE ~ s(bc_day) + (1 | id) + (1 | id:cycle)
logFE ~ s(bc_day) + (1 | id) + (1 | id:cycle)
logP ~ s(bc_day) + (1 | id) + (1 | id:cycle)
logT ~ s(bc_day) + (1 | id) + (1 | id:cycle)
logFT ~ s(bc_day) + (1 | id) + (1 | id:cycle)
logLH ~ s(bc_day) + (1 | id) + (1 | id:cycle)
Data: biocycle (Number of observations: 3394)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Smoothing Spline Hyperparameters:
Estimate Est.Error l-95% CI u-95% CI Rhat
sds(logE_sbc_day_1) 5.05 1.28 3.20 8.11 1.00
sds(logFE_sbc_day_1) 5.44 1.33 3.51 8.55 1.01
sds(logP_sbc_day_1) 5.80 1.47 3.78 9.53 1.00
sds(logT_sbc_day_1) 1.03 0.34 0.59 1.89 1.00
sds(logFT_sbc_day_1) 1.10 0.36 0.60 1.97 1.00
sds(logLH_sbc_day_1) 5.90 1.48 3.74 9.47 1.00
Bulk_ESS Tail_ESS
sds(logE_sbc_day_1) 1013 1307
sds(logFE_sbc_day_1) 798 1374
sds(logP_sbc_day_1) 901 1286
sds(logT_sbc_day_1) 754 1424
sds(logFT_sbc_day_1) 1044 1693
sds(logLH_sbc_day_1) 930 1561
Multilevel Hyperparameters:
~id (Number of levels: 246)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(logE_Intercept) 0.21 0.01 0.18 0.23 1.00
sd(logFE_Intercept) 0.20 0.02 0.17 0.23 1.00
sd(logP_Intercept) 0.42 0.03 0.37 0.47 1.00
sd(logT_Intercept) 0.34 0.02 0.31 0.38 1.01
sd(logFT_Intercept) 0.39 0.02 0.36 0.43 1.03
sd(logLH_Intercept) 0.29 0.02 0.25 0.33 1.01
Bulk_ESS Tail_ESS
sd(logE_Intercept) 904 1168
sd(logFE_Intercept) 825 1023
sd(logP_Intercept) 1169 1888
sd(logT_Intercept) 292 649
sd(logFT_Intercept) 165 270
sd(logLH_Intercept) 937 2557
~id:cycle (Number of levels: 448)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(logE_Intercept) 0.01 0.00 0.00 0.02 1.01
sd(logFE_Intercept) 0.01 0.01 0.00 0.02 1.03
sd(logP_Intercept) 0.16 0.03 0.11 0.21 1.01
sd(logT_Intercept) 0.01 0.01 0.00 0.02 1.01
sd(logFT_Intercept) 0.03 0.00 0.02 0.04 1.02
sd(logLH_Intercept) 0.12 0.03 0.05 0.17 1.02
Bulk_ESS Tail_ESS
sd(logE_Intercept) 185 429
sd(logFE_Intercept) 136 433
sd(logP_Intercept) 688 1041
sd(logT_Intercept) 221 698
sd(logFT_Intercept) 303 636
sd(logLH_Intercept) 468 573
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
logE_Intercept 4.46 0.02 4.43 4.49 1.01 1126
logFE_Intercept 0.43 0.02 0.40 0.47 1.00 1603
logP_Intercept 7.30 0.03 7.24 7.36 1.01 977
logT_Intercept 3.31 0.02 3.26 3.35 1.02 145
logFT_Intercept -0.88 0.03 -0.93 -0.83 1.02 166
logLH_Intercept 1.83 0.02 1.79 1.88 1.00 1509
logE_sbc_day_1 -11.70 1.56 -14.82 -8.63 1.00 908
logFE_sbc_day_1 -12.73 1.76 -16.24 -9.33 1.00 920
logP_sbc_day_1 -19.59 2.25 -24.08 -15.34 1.00 2294
logT_sbc_day_1 -0.95 0.51 -1.96 0.03 1.00 1117
logFT_sbc_day_1 -0.84 0.56 -1.92 0.21 1.00 1131
logLH_sbc_day_1 3.74 2.15 -0.34 7.95 1.00 1565
Tail_ESS
logE_Intercept 1898
logFE_Intercept 2357
logP_Intercept 1699
logT_Intercept 309
logFT_Intercept 293
logLH_Intercept 2376
logE_sbc_day_1 1671
logFE_sbc_day_1 1430
logP_sbc_day_1 2651
logT_sbc_day_1 2162
logFT_sbc_day_1 2079
logLH_sbc_day_1 2497
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sigma_logE 0.50 0.01 0.49 0.51 1.00 1691
sigma_logFE 0.56 0.01 0.55 0.58 1.00 1716
sigma_logP 0.64 0.01 0.62 0.65 1.00 4315
sigma_logT 0.18 0.00 0.17 0.18 1.00 2678
sigma_logFT 0.20 0.00 0.19 0.20 1.00 1734
sigma_logLH 0.65 0.01 0.63 0.66 1.00 4606
Tail_ESS
sigma_logE 2336
sigma_logFE 2409
sigma_logP 2912
sigma_logT 2504
sigma_logFT 2784
sigma_logLH 2765
Residual Correlations:
Estimate Est.Error l-95% CI u-95% CI Rhat
rescor(logE,logFE) 0.99 0.00 0.99 0.99 1.01
rescor(logE,logP) -0.13 0.02 -0.16 -0.09 1.00
rescor(logFE,logP) -0.14 0.02 -0.18 -0.11 1.00
rescor(logE,logT) 0.24 0.02 0.21 0.28 1.00
rescor(logFE,logT) 0.23 0.02 0.19 0.26 1.00
rescor(logP,logT) -0.03 0.02 -0.06 0.01 1.00
rescor(logE,logFT) 0.25 0.02 0.22 0.29 1.00
rescor(logFE,logFT) 0.30 0.02 0.26 0.33 1.00
rescor(logP,logFT) -0.09 0.02 -0.13 -0.06 1.00
rescor(logT,logFT) 0.85 0.01 0.84 0.86 1.00
rescor(logE,logLH) 0.35 0.02 0.32 0.38 1.00
rescor(logFE,logLH) 0.35 0.02 0.32 0.39 1.00
rescor(logP,logLH) -0.15 0.02 -0.18 -0.12 1.00
rescor(logT,logLH) 0.33 0.02 0.29 0.36 1.00
rescor(logFT,logLH) 0.33 0.02 0.30 0.36 1.00
Bulk_ESS Tail_ESS
rescor(logE,logFE) 699 1602
rescor(logE,logP) 7153 3481
rescor(logFE,logP) 7223 3393
rescor(logE,logT) 2872 2924
rescor(logFE,logT) 2784 3042
rescor(logP,logT) 3699 2826
rescor(logE,logFT) 2730 2989
rescor(logFE,logFT) 2831 3015
rescor(logP,logFT) 3737 3035
rescor(logT,logFT) 1431 2459
rescor(logE,logLH) 3565 3019
rescor(logFE,logLH) 3495 2921
rescor(logP,logLH) 5476 3126
rescor(logT,logLH) 4368 3190
rescor(logFT,logLH) 4660 3217
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
# Predict
preds_bc <- predict(mod_mv_bc, newdata = bc_days, re_formula = NA)
# Assign to bc_days using named list elements for safety
bc_days$mv_est_estradiol_bc <- round(preds_bc[, "Estimate", "logE"], 2)
bc_days$mv_est_free_estradiol_bc <- round(preds_bc[, "Estimate", "logFE"], 2)
bc_days$mv_est_progesterone_bc <- round(preds_bc[, "Estimate", "logP"], 2)
bc_days$mv_est_testosterone_bc <- round(preds_bc[, "Estimate", "logT"], 2)
bc_days$mv_est_free_testosterone_bc <- round(preds_bc[, "Estimate", "logFT"], 2)
bc_days$mv_est_luteinising_bc <- round(preds_bc[, "Estimate", "logLH"], 2)
rio::export(bc_days ,"merge_files/bc_days_mv.rds")
rio::export(bc_days ,"merge_files/bc_days_mv.tsv")
rio::export(bc_days ,"merge_files/bc_days_mv.sav")
bf_e_fc <- bf(logE ~ s(fc_day) + (1 | id) + (1|id:cycle))
bf_fe_fc <- bf(logFE ~ s(fc_day) + (1 | id) + (1|id:cycle))
bf_p_fc <- bf(logP ~ s(fc_day) + (1 | id) + (1|id:cycle))
bf_t_fc <- bf(logT ~ s(fc_day) + (1 | id) + (1|id:cycle))
bf_ft_fc <- bf(logFT ~ s(fc_day) + (1 | id) + (1|id:cycle))
bf_lh_fc <- bf(logLH ~ s(fc_day) + (1 | id) + (1|id:cycle))
mod_mv_fc <- brm(
bf_e_fc + bf_fe_fc + bf_p_fc + bf_t_fc + bf_ft_fc + bf_lh_fc + set_rescor(TRUE),
data = biocycle,
control = list(adapt_delta = 0.95, max_treedepth = 20),
file = 'models/mod_mv_fc'
)
Warning: Rows containing NAs were excluded from the model.
mod_mv_fc
Family: MV(gaussian, gaussian, gaussian, gaussian, gaussian, gaussian)
Links: mu = identity
mu = identity
mu = identity
mu = identity
mu = identity
mu = identity
Formula: logE ~ s(fc_day) + (1 | id) + (1 | id:cycle)
logFE ~ s(fc_day) + (1 | id) + (1 | id:cycle)
logP ~ s(fc_day) + (1 | id) + (1 | id:cycle)
logT ~ s(fc_day) + (1 | id) + (1 | id:cycle)
logFT ~ s(fc_day) + (1 | id) + (1 | id:cycle)
logLH ~ s(fc_day) + (1 | id) + (1 | id:cycle)
Data: biocycle (Number of observations: 3394)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Smoothing Spline Hyperparameters:
Estimate Est.Error l-95% CI u-95% CI Rhat
sds(logE_sfc_day_1) 1.82 0.57 1.03 3.21 1.00
sds(logFE_sfc_day_1) 2.03 0.62 1.14 3.51 1.00
sds(logP_sfc_day_1) 3.84 1.10 2.35 6.47 1.00
sds(logT_sfc_day_1) 0.60 0.20 0.33 1.09 1.00
sds(logFT_sfc_day_1) 0.71 0.24 0.39 1.28 1.00
sds(logLH_sfc_day_1) 3.22 0.91 1.94 5.42 1.00
Bulk_ESS Tail_ESS
sds(logE_sfc_day_1) 1648 2089
sds(logFE_sfc_day_1) 1817 2336
sds(logP_sfc_day_1) 1418 2690
sds(logT_sfc_day_1) 1730 2478
sds(logFT_sfc_day_1) 1741 2648
sds(logLH_sfc_day_1) 1636 2430
Multilevel Hyperparameters:
~id (Number of levels: 246)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(logE_Intercept) 0.16 0.01 0.14 0.19 1.01
sd(logFE_Intercept) 0.15 0.01 0.12 0.18 1.01
sd(logP_Intercept) 0.38 0.03 0.32 0.44 1.00
sd(logT_Intercept) 0.34 0.02 0.31 0.38 1.01
sd(logFT_Intercept) 0.39 0.02 0.36 0.43 1.01
sd(logLH_Intercept) 0.30 0.02 0.26 0.35 1.00
Bulk_ESS Tail_ESS
sd(logE_Intercept) 641 1521
sd(logFE_Intercept) 486 1572
sd(logP_Intercept) 1074 2253
sd(logT_Intercept) 327 680
sd(logFT_Intercept) 354 558
sd(logLH_Intercept) 1440 2111
~id:cycle (Number of levels: 448)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(logE_Intercept) 0.01 0.00 0.00 0.02 1.01
sd(logFE_Intercept) 0.01 0.01 0.00 0.02 1.01
sd(logP_Intercept) 0.20 0.03 0.13 0.26 1.01
sd(logT_Intercept) 0.01 0.00 0.00 0.02 1.00
sd(logFT_Intercept) 0.03 0.00 0.02 0.04 1.01
sd(logLH_Intercept) 0.11 0.03 0.03 0.16 1.01
Bulk_ESS Tail_ESS
sd(logE_Intercept) 274 537
sd(logFE_Intercept) 217 616
sd(logP_Intercept) 619 994
sd(logT_Intercept) 500 980
sd(logFT_Intercept) 397 923
sd(logLH_Intercept) 407 424
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
logE_Intercept 4.46 0.01 4.43 4.49 1.00 2211
logFE_Intercept 0.44 0.01 0.41 0.46 1.00 3284
logP_Intercept 7.30 0.03 7.24 7.36 1.00 1734
logT_Intercept 3.31 0.02 3.27 3.36 1.01 167
logFT_Intercept -0.88 0.02 -0.93 -0.83 1.01 187
logLH_Intercept 1.83 0.02 1.79 1.88 1.00 2073
logE_sfc_day_1 -0.72 1.44 -3.45 2.20 1.00 1078
logFE_sfc_day_1 -0.81 1.61 -3.91 2.43 1.00 1078
logP_sfc_day_1 -10.49 2.78 -15.86 -5.23 1.00 2593
logT_sfc_day_1 0.35 0.50 -0.59 1.33 1.00 1786
logFT_sfc_day_1 0.50 0.55 -0.60 1.59 1.00 1806
logLH_sfc_day_1 1.59 2.24 -2.76 6.09 1.00 2341
Tail_ESS
logE_Intercept 2543
logFE_Intercept 3057
logP_Intercept 1851
logT_Intercept 397
logFT_Intercept 388
logLH_Intercept 2578
logE_sfc_day_1 1587
logFE_sfc_day_1 1738
logP_sfc_day_1 3263
logT_sfc_day_1 2476
logFT_sfc_day_1 2779
logLH_sfc_day_1 2446
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sigma_logE 0.58 0.01 0.57 0.60 1.00 1481
sigma_logFE 0.66 0.01 0.64 0.67 1.00 1471
sigma_logP 0.81 0.01 0.79 0.83 1.00 5363
sigma_logT 0.18 0.00 0.18 0.19 1.00 3504
sigma_logFT 0.20 0.00 0.20 0.21 1.00 2900
sigma_logLH 0.70 0.01 0.68 0.71 1.00 4020
Tail_ESS
sigma_logE 2196
sigma_logFE 2637
sigma_logP 3016
sigma_logT 2737
sigma_logFT 2922
sigma_logLH 2934
Residual Correlations:
Estimate Est.Error l-95% CI u-95% CI Rhat
rescor(logE,logFE) 0.99 0.00 0.99 0.99 1.00
rescor(logE,logP) -0.02 0.02 -0.05 0.02 1.00
rescor(logFE,logP) -0.04 0.02 -0.07 -0.00 1.00
rescor(logE,logT) 0.26 0.02 0.23 0.30 1.00
rescor(logFE,logT) 0.25 0.02 0.21 0.28 1.00
rescor(logP,logT) -0.03 0.02 -0.07 0.00 1.00
rescor(logE,logFT) 0.27 0.02 0.24 0.31 1.00
rescor(logFE,logFT) 0.31 0.02 0.28 0.34 1.00
rescor(logP,logFT) -0.12 0.02 -0.16 -0.09 1.00
rescor(logT,logFT) 0.86 0.01 0.85 0.87 1.00
rescor(logE,logLH) 0.36 0.02 0.33 0.39 1.00
rescor(logFE,logLH) 0.37 0.02 0.34 0.40 1.00
rescor(logP,logLH) -0.17 0.02 -0.21 -0.14 1.00
rescor(logT,logLH) 0.39 0.02 0.36 0.42 1.00
rescor(logFT,logLH) 0.39 0.02 0.36 0.42 1.00
Bulk_ESS Tail_ESS
rescor(logE,logFE) 1025 1817
rescor(logE,logP) 7752 3024
rescor(logFE,logP) 7813 3029
rescor(logE,logT) 3810 2963
rescor(logFE,logT) 3771 3114
rescor(logP,logT) 3677 3120
rescor(logE,logFT) 3562 3055
rescor(logFE,logFT) 3789 3265
rescor(logP,logFT) 4093 2973
rescor(logT,logFT) 1439 2328
rescor(logE,logLH) 3658 3174
rescor(logFE,logLH) 3718 3154
rescor(logP,logLH) 6344 2936
rescor(logT,logLH) 4363 2922
rescor(logFT,logLH) 4040 2807
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
preds_fc <- predict(mod_mv_fc, newdata = fc_days, re_formula = NA)
fc_days$mv_est_estradiol_fc <- round(preds_fc[, "Estimate", "logE"], 2)
fc_days$mv_est_free_estradiol_fc <- round(preds_fc[, "Estimate", "logFE"], 2)
fc_days$mv_est_progesterone_fc <- round(preds_fc[, "Estimate", "logP"], 2)
fc_days$mv_est_testosterone_fc <- round(preds_fc[, "Estimate", "logT"], 2)
fc_days$mv_est_free_testosterone_fc <- round(preds_fc[, "Estimate", "logFT"], 2)
fc_days$mv_est_luteinising_fc <- round(preds_fc[, "Estimate", "logLH"], 2)
rio::export(fc_days ,"merge_files/fc_days_mv.rds")
rio::export(fc_days ,"merge_files/fc_days_mv.tsv")
rio::export(fc_days ,"merge_files/fc_days_mv.sav")
# Filter data
biocycle_lh <- biocycle %>% filter(between(lh_day, -20, 14))
bf_e_lh <- bf(logE ~ s(lh_day) + (1 | id) + (1|id:cycle))
bf_fe_lh <- bf(logFE ~ s(lh_day) + (1 | id) + (1|id:cycle))
bf_p_lh <- bf(logP ~ s(lh_day) + (1 | id) + (1|id:cycle))
bf_t_lh <- bf(logT ~ s(lh_day) + (1 | id) + (1|id:cycle))
bf_ft_lh <- bf(logFT ~ s(lh_day) + (1 | id) + (1|id:cycle))
bf_lh_lh <- bf(logLH ~ s(lh_day) + (1 | id) + (1|id:cycle))
# PBFW not modeled in LH model
mod_mv_lh <- brm(
bf_e_lh + bf_fe_lh + bf_p_lh + bf_t_lh + bf_ft_lh + bf_lh_lh + set_rescor(TRUE),
data = biocycle_lh,
control = list(adapt_delta = 0.95, max_treedepth = 20),
file = 'models/mod_mv_lh'
)
Warning: Rows containing NAs were excluded from the model.
mod_mv_lh
Family: MV(gaussian, gaussian, gaussian, gaussian, gaussian, gaussian)
Links: mu = identity
mu = identity
mu = identity
mu = identity
mu = identity
mu = identity
Formula: logE ~ s(lh_day) + (1 | id) + (1 | id:cycle)
logFE ~ s(lh_day) + (1 | id) + (1 | id:cycle)
logP ~ s(lh_day) + (1 | id) + (1 | id:cycle)
logT ~ s(lh_day) + (1 | id) + (1 | id:cycle)
logFT ~ s(lh_day) + (1 | id) + (1 | id:cycle)
logLH ~ s(lh_day) + (1 | id) + (1 | id:cycle)
Data: biocycle_lh (Number of observations: 2565)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Smoothing Spline Hyperparameters:
Estimate Est.Error l-95% CI u-95% CI Rhat
sds(logE_slh_day_1) 8.98 2.12 5.81 14.10 1.00
sds(logFE_slh_day_1) 10.25 2.59 6.65 16.61 1.00
sds(logP_slh_day_1) 6.59 1.69 4.19 10.79 1.00
sds(logT_slh_day_1) 1.65 0.49 0.96 2.85 1.00
sds(logFT_slh_day_1) 1.64 0.50 0.96 2.87 1.00
sds(logLH_slh_day_1) 11.85 2.94 7.63 18.95 1.00
Bulk_ESS Tail_ESS
sds(logE_slh_day_1) 838 1178
sds(logFE_slh_day_1) 715 1572
sds(logP_slh_day_1) 617 1059
sds(logT_slh_day_1) 929 1370
sds(logFT_slh_day_1) 908 1748
sds(logLH_slh_day_1) 791 1663
Multilevel Hyperparameters:
~id (Number of levels: 214)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(logE_Intercept) 0.18 0.01 0.16 0.21 1.00
sd(logFE_Intercept) 0.18 0.02 0.15 0.21 1.00
sd(logP_Intercept) 0.24 0.03 0.18 0.30 1.00
sd(logT_Intercept) 0.34 0.02 0.31 0.37 1.02
sd(logFT_Intercept) 0.38 0.02 0.34 0.41 1.01
sd(logLH_Intercept) 0.29 0.02 0.25 0.33 1.00
Bulk_ESS Tail_ESS
sd(logE_Intercept) 762 1725
sd(logFE_Intercept) 692 1779
sd(logP_Intercept) 455 754
sd(logT_Intercept) 215 461
sd(logFT_Intercept) 358 760
sd(logLH_Intercept) 1129 2296
~id:cycle (Number of levels: 340)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(logE_Intercept) 0.01 0.01 0.00 0.02 1.02
sd(logFE_Intercept) 0.01 0.01 0.00 0.02 1.01
sd(logP_Intercept) 0.25 0.03 0.20 0.31 1.00
sd(logT_Intercept) 0.01 0.01 0.00 0.02 1.01
sd(logFT_Intercept) 0.03 0.01 0.02 0.04 1.02
sd(logLH_Intercept) 0.06 0.04 0.00 0.13 1.01
Bulk_ESS Tail_ESS
sd(logE_Intercept) 201 566
sd(logFE_Intercept) 177 392
sd(logP_Intercept) 583 1059
sd(logT_Intercept) 282 730
sd(logFT_Intercept) 239 749
sd(logLH_Intercept) 272 671
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
logE_Intercept 4.47 0.02 4.44 4.50 1.00 1400
logFE_Intercept 0.44 0.02 0.41 0.47 1.00 1639
logP_Intercept 7.30 0.03 7.25 7.34 1.00 2047
logT_Intercept 3.30 0.02 3.26 3.34 1.02 178
logFT_Intercept -0.91 0.03 -0.96 -0.85 1.02 143
logLH_Intercept 1.86 0.02 1.81 1.90 1.00 1067
logE_slh_day_1 -1.23 2.14 -5.40 3.03 1.00 787
logFE_slh_day_1 -1.45 2.40 -6.09 3.23 1.00 766
logP_slh_day_1 -12.75 2.64 -18.02 -7.67 1.00 1705
logT_slh_day_1 0.72 0.75 -0.76 2.20 1.00 973
logFT_slh_day_1 0.50 0.82 -1.07 2.13 1.00 905
logLH_slh_day_1 18.52 2.85 13.01 24.03 1.00 1228
Tail_ESS
logE_Intercept 2232
logFE_Intercept 2194
logP_Intercept 2557
logT_Intercept 453
logFT_Intercept 369
logLH_Intercept 1935
logE_slh_day_1 1184
logFE_slh_day_1 1197
logP_slh_day_1 2499
logT_slh_day_1 1858
logFT_slh_day_1 1531
logLH_slh_day_1 2238
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sigma_logE 0.47 0.01 0.46 0.49 1.00 1050
sigma_logFE 0.53 0.01 0.51 0.55 1.00 1027
sigma_logP 0.57 0.01 0.55 0.59 1.00 4517
sigma_logT 0.18 0.00 0.17 0.18 1.00 2035
sigma_logFT 0.20 0.00 0.19 0.20 1.00 1478
sigma_logLH 0.60 0.01 0.58 0.61 1.00 3977
Tail_ESS
sigma_logE 2193
sigma_logFE 2244
sigma_logP 3040
sigma_logT 2475
sigma_logFT 2186
sigma_logLH 3156
Residual Correlations:
Estimate Est.Error l-95% CI u-95% CI Rhat
rescor(logE,logFE) 0.99 0.00 0.99 0.99 1.00
rescor(logE,logP) 0.14 0.02 0.10 0.18 1.00
rescor(logFE,logP) 0.12 0.02 0.08 0.17 1.00
rescor(logE,logT) 0.30 0.02 0.26 0.34 1.00
rescor(logFE,logT) 0.28 0.02 0.24 0.32 1.00
rescor(logP,logT) 0.11 0.02 0.07 0.15 1.00
rescor(logE,logFT) 0.28 0.02 0.24 0.32 1.00
rescor(logFE,logFT) 0.32 0.02 0.29 0.36 1.00
rescor(logP,logFT) 0.06 0.02 0.02 0.11 1.00
rescor(logT,logFT) 0.86 0.01 0.85 0.87 1.00
rescor(logE,logLH) 0.36 0.02 0.32 0.39 1.00
rescor(logFE,logLH) 0.36 0.02 0.32 0.40 1.00
rescor(logP,logLH) 0.01 0.02 -0.03 0.04 1.00
rescor(logT,logLH) 0.27 0.02 0.24 0.31 1.00
rescor(logFT,logLH) 0.28 0.02 0.24 0.31 1.00
Bulk_ESS Tail_ESS
rescor(logE,logFE) 683 1786
rescor(logE,logP) 4468 2932
rescor(logFE,logP) 4463 2971
rescor(logE,logT) 2462 2750
rescor(logFE,logT) 2364 2834
rescor(logP,logT) 2319 2464
rescor(logE,logFT) 2062 2643
rescor(logFE,logFT) 2376 2743
rescor(logP,logFT) 2461 2721
rescor(logT,logFT) 1233 2737
rescor(logE,logLH) 3006 2232
rescor(logFE,logLH) 2945 2256
rescor(logP,logLH) 3790 2828
rescor(logT,logLH) 4379 3194
rescor(logFT,logLH) 3991 2789
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
preds_lh <- predict(mod_mv_lh, newdata = lh_days, re_formula = NA)
lh_days$mv_est_estradiol_lh <- round(preds_lh[, "Estimate", "logE"], 2)
lh_days$mv_est_free_estradiol_lh <- round(preds_lh[, "Estimate", "logFE"], 2)
lh_days$mv_est_progesterone_lh <- round(preds_lh[, "Estimate", "logP"], 2)
lh_days$mv_est_testosterone_lh <- round(preds_lh[, "Estimate", "logT"], 2)
lh_days$mv_est_luteinising_lh <- round(preds_lh[, "Estimate", "logLH"], 2)
lh_days$mv_est_free_testosterone_lh <- round(preds_lh[, "Estimate", "logFT"], 2)
rio::export(lh_days ,"merge_files/lh_days_mv.rds")
rio::export(lh_days ,"merge_files/lh_days_mv.tsv")
rio::export(lh_days ,"merge_files/lh_days_mv.sav")
# Load old files
bc_days_old <- rio::import("merge_files/bc_days.rds")
Warning: Missing `trust` will be set to FALSE by default for RDS in
2.0.0.
fc_days_old <- rio::import("merge_files/fc_days.rds")
Warning: Missing `trust` will be set to FALSE by default for RDS in
2.0.0.
lh_days_old <- rio::import("merge_files/lh_days.rds")
Warning: Missing `trust` will be set to FALSE by default for RDS in
2.0.0.
# Join and correlate
# BC
bc_joined <- left_join(bc_days, bc_days_old, by = "bc_day", suffix = c("_new", "_old"))
cor_bc <- bc_joined %>%
summarise(
cor_e = cor(mv_est_estradiol_bc, est_estradiol_bc, use = "complete.obs"),
cor_fe = cor(mv_est_free_estradiol_bc, est_free_estradiol_bc, use = "complete.obs"),
cor_p = cor(mv_est_progesterone_bc, est_progesterone_bc, use = "complete.obs"),
cor_t = cor(mv_est_testosterone_bc, est_testosterone_bc, use = "complete.obs"),
cor_ft = cor(mv_est_free_testosterone_bc, est_free_testosterone_bc, use = "complete.obs")
)
print("Correlations for BC models:")
[1] "Correlations for BC models:"
print(cor_bc)
cor_e cor_fe cor_p cor_t cor_ft
1 0.9892037 0.9871843 0.9997668 0.9989413 0.9986248
# FC
fc_joined <- left_join(fc_days, fc_days_old, by = "fc_day", suffix = c("_new", "_old"))
cor_fc <- fc_joined %>%
summarise(
cor_e = cor(mv_est_estradiol_fc, est_estradiol_fc, use = "complete.obs"),
cor_fe = cor(mv_est_free_estradiol_fc, est_free_estradiol_fc, use = "complete.obs"),
cor_p = cor(mv_est_progesterone_fc, est_progesterone_fc, use = "complete.obs"),
cor_t = cor(mv_est_testosterone_fc, est_testosterone_fc, use = "complete.obs"),
cor_ft = cor(mv_est_free_testosterone_fc, est_free_testosterone_fc, use = "complete.obs")
)
print("Correlations for FC models:")
[1] "Correlations for FC models:"
print(cor_fc)
cor_e cor_fe cor_p cor_t cor_ft
1 0.9740625 0.9961721 0.9986579 0.9786549 0.9942325
# LH
lh_joined <- left_join(lh_days, lh_days_old, by = "lh_day", suffix = c("_new", "_old"))
cor_lh <- lh_joined %>%
summarise(
cor_e = cor(mv_est_estradiol_lh, est_estradiol_lh, use = "complete.obs"),
cor_fe = cor(mv_est_free_estradiol_lh, est_free_estradiol_lh, use = "complete.obs"),
cor_p = cor(mv_est_progesterone_lh, est_progesterone_lh, use = "complete.obs"),
cor_t = cor(mv_est_testosterone_lh, est_testosterone_lh, use = "complete.obs"),
cor_ft = cor(mv_est_free_testosterone_lh, est_free_testosterone_lh, use = "complete.obs")
)
print("Correlations for LH models:")
[1] "Correlations for LH models:"
print(cor_lh)
# A tibble: 1 × 5
cor_e cor_fe cor_p cor_t cor_ft
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1.000 1.000 1.000 0.999 0.999
This section checks if the univariate imputation (Old) underestimated the correlation between different hormones compared to the multivariate imputation (New).
# Function to get lower triangle of correlation matrix
get_cor_summary <- function(df, pattern) {
# Select columns matching pattern
cols <- df %>% select(matches(pattern))
# Calculate correlation
cormat <- cor(cols, use = "complete.obs")
# Return readable format
return(cormat)
}
print("--- Backward Counting: Inter-hormone correlations ---")
[1] "--- Backward Counting: Inter-hormone correlations ---"
# Old Correlations (Univariate)
print("OLD (Univariate) Inter-hormone Correlations:")
[1] "OLD (Univariate) Inter-hormone Correlations:"
old_vars_bc <- c("est_estradiol_bc", "est_progesterone_bc", "est_testosterone_bc", "est_free_estradiol_bc")
print(cor(bc_joined[, old_vars_bc], use = "complete.obs"))
est_estradiol_bc est_progesterone_bc
est_estradiol_bc 1.0000000 0.6215797
est_progesterone_bc 0.6215797 1.0000000
est_testosterone_bc 0.5675967 0.2021296
est_free_estradiol_bc 0.9994729 0.6025274
est_testosterone_bc est_free_estradiol_bc
est_estradiol_bc 0.5675967 0.9994729
est_progesterone_bc 0.2021296 0.6025274
est_testosterone_bc 1.0000000 0.5634114
est_free_estradiol_bc 0.5634114 1.0000000
# New Correlations (Multivariate)
print("NEW (Multivariate) Inter-hormone Correlations:")
[1] "NEW (Multivariate) Inter-hormone Correlations:"
new_vars_bc <- c("mv_est_estradiol_bc", "mv_est_progesterone_bc", "mv_est_testosterone_bc", "mv_est_free_estradiol_bc")
print(cor(bc_joined[, new_vars_bc], use = "complete.obs"))
mv_est_estradiol_bc mv_est_progesterone_bc
mv_est_estradiol_bc 1.0000000 0.6278980
mv_est_progesterone_bc 0.6278980 1.0000000
mv_est_testosterone_bc 0.6672501 0.1963934
mv_est_free_estradiol_bc 0.9995365 0.6049229
mv_est_testosterone_bc
mv_est_estradiol_bc 0.6672501
mv_est_progesterone_bc 0.1963934
mv_est_testosterone_bc 1.0000000
mv_est_free_estradiol_bc 0.6751168
mv_est_free_estradiol_bc
mv_est_estradiol_bc 0.9995365
mv_est_progesterone_bc 0.6049229
mv_est_testosterone_bc 0.6751168
mv_est_free_estradiol_bc 1.0000000
print("--- Difference (New - Old) ---")
[1] "--- Difference (New - Old) ---"
# Calculate difference matrix (matching names first)
c_old <- cor(bc_joined[, old_vars_bc], use = "complete.obs")
c_new <- cor(bc_joined[, new_vars_bc], use = "complete.obs")
dimnames(c_new) <- dimnames(c_old) # Ensure names match for subtraction
print(round(c_new - c_old, 3))
est_estradiol_bc est_progesterone_bc
est_estradiol_bc 0.000 0.006
est_progesterone_bc 0.006 0.000
est_testosterone_bc 0.100 -0.006
est_free_estradiol_bc 0.000 0.002
est_testosterone_bc est_free_estradiol_bc
est_estradiol_bc 0.100 0.000
est_progesterone_bc -0.006 0.002
est_testosterone_bc 0.000 0.112
est_free_estradiol_bc 0.112 0.000
Based on the comparison between the old univariate imputations and the new multivariate imputations (with residual correlations modeled):