Data Preparation
dataset <- read.csv(file = params$file, header = T, sep = ",")
#run parallel cores
options(mc.cores = 8, brms.backend = "cmdstanr", brms.file_refit = "on_change")
#install.packages("loo")
#remotes::install_github("stan-dev/loo")
library(remotes)
library(loo)
library(psych)
library(relativeVariability)
library(brms)
library(cmdstanr)
library(data.table)
library(ggplot2)
library(dplyr)
library(haven)
#library(rstanarm)
library(knitr)
library(rstan)
library(shinystan)
Rescale Data
dataset$negemo_full_m <- (dataset$negemo_full_m -1)*(4/6)+1
dataset$posemo_full_m <- (dataset$posemo_full_m -1)*(4/6)+1
dataset$neuro_t <- (dataset$neuro_t -1)*(4/6)+1
hist(dataset$negemo_full_m)

Censoring Data
range(dataset$negemo_full_m, na.rm = T)
## [1] 1 5
range(dataset$posemo_full_m, na.rm = T)
## [1] 1 5
sd(dataset$negemo_full_m, na.rm = T)
## [1] 0.6627719
mean(dataset$negemo_full_m, na.rm = T)
## [1] 1.632069
sd(dataset$posemo_full_m, na.rm = T)
## [1] 0.8617522
mean(dataset$posemo_full_m, na.rm = T)
## [1] 3.496952
sd(dataset$neuro_t, na.rm = T)
## [1] 0.703259
mean(dataset$neuro_t, na.rm = T)
## [1] 3.164141
qplot(dataset$negemo_full_, binwidth = .1)
## Warning: Removed 1098 rows containing non-finite values (`stat_bin()`).

qplot(dataset$posemo_full_, binwidth = .1)
## Warning: Removed 1098 rows containing non-finite values (`stat_bin()`).

dataset$Acens <- case_when(dataset$negemo_full_m == 1 ~ "left",
dataset$negemo_full_m == 5 ~ "right",
TRUE ~ "none")
table(dataset$Acens)
##
## left none right
## 1694 8103 3
dataset$Acens_p <- case_when(dataset$posemo_full_m == 1 ~ "left",
dataset$posemo_full_m == 5 ~ "right",
TRUE ~ "none")
table(dataset$Acens_p)
##
## left none right
## 26 9326 448
BCLSM Negative Emotion
Kn_model_neuro3 <- brm(bf(negemo_full_m | cens(Acens) ~ neuro_t + (1|person_id),
sigma ~ neuro_t+ (1|person_id)), data = dataset,
iter = 7000, warmup = 2000, chains = 4,
control = list(adapt_delta = .99), init = 0.1,
file = paste("models/", params$file, "Kn_model_neuro3"))
## Warning: Rows containing NAs were excluded from the model.
print(Kn_model_neuro3)
## Family: gaussian
## Links: mu = identity; sigma = log
## Formula: negemo_full_m | cens(Acens) ~ neuro_t + (1 | person_id)
## sigma ~ neuro_t + (1 | person_id)
## Data: dataset (Number of observations: 8618)
## Draws: 4 chains, each with iter = 7000; warmup = 2000; thin = 1;
## total post-warmup draws = 20000
##
## Group-Level Effects:
## ~person_id (Number of levels: 99)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.46 0.04 0.40 0.54 1.00 1776 3747
## sd(sigma_Intercept) 0.36 0.03 0.31 0.42 1.00 2450 5132
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.84 0.22 0.42 1.26 1.00 1117 2033
## sigma_Intercept -0.81 0.17 -1.15 -0.45 1.00 1396 2515
## neuro_t 0.22 0.07 0.09 0.35 1.00 1128 1871
## sigma_neuro_t 0.10 0.05 -0.01 0.20 1.00 1374 2575
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(Kn_model_neuro3)


pp_check(Kn_model_neuro3)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
## Warning: Censored responses are not shown in 'pp_check'.

prior_summary(Kn_model_neuro3)
## prior class coef group resp dpar nlpar lb ub source
## (flat) b default
## (flat) b neuro_t (vectorized)
## (flat) b sigma default
## (flat) b neuro_t sigma (vectorized)
## student_t(3, 1.4, 2.5) Intercept default
## student_t(3, 0, 2.5) Intercept sigma default
## student_t(3, 0, 2.5) sd 0 default
## student_t(3, 0, 2.5) sd sigma 0 default
## student_t(3, 0, 2.5) sd person_id 0 (vectorized)
## student_t(3, 0, 2.5) sd Intercept person_id 0 (vectorized)
## student_t(3, 0, 2.5) sd person_id sigma 0 (vectorized)
## student_t(3, 0, 2.5) sd Intercept person_id sigma 0 (vectorized)
Model comparison
scale vs. no scale parameter
Kn_model_neuro2 <- brm(negemo_full_m | cens(Acens) ~ neuro_t + (1|person_id), data = dataset,
iter = 6000, warmup = 2000, chains = 4,
control = list(adapt_delta = .98), inits = 0.1 ,
file = paste("models/", params$file, "Kn_model_neuro2"))
## Warning: Argument 'inits' is deprecated. Please use argument 'init' instead.
## Warning: Rows containing NAs were excluded from the model.
print(Kn_model_neuro2)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: negemo_full_m | cens(Acens) ~ neuro_t + (1 | person_id)
## Data: dataset (Number of observations: 8618)
## Draws: 4 chains, each with iter = 6000; warmup = 2000; thin = 1;
## total post-warmup draws = 16000
##
## Group-Level Effects:
## ~person_id (Number of levels: 99)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.45 0.03 0.39 0.52 1.00 1247 2013
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.81 0.21 0.39 1.23 1.00 890 1401
## neuro_t 0.23 0.07 0.10 0.36 1.00 865 1403
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.65 0.01 0.64 0.66 1.00 15574 10913
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
modelA <- Kn_model_neuro2
modelB <- Kn_model_neuro3
modelA <- add_criterion(modelA, "loo")
modelB <- add_criterion(modelB, "loo")
loo <- loo_compare(modelA,modelB, criterion = "loo")
loo <- as.data.frame(loo)
loo$Dataset <- params$file
loo <- tibble::rownames_to_column(loo, "model")
library("writexl")
write_xlsx(loo,paste0("loo", params$file, ".xlsx"))
kable(loo)
modelB |
0.000 |
0.00000 |
-7508.931 |
104.63868 |
297.5132 |
25.844899 |
15017.86 |
209.2774 |
Dataset 10 public.csv |
modelA |
-661.357 |
67.77749 |
-8170.288 |
95.28992 |
100.6525 |
2.483332 |
16340.58 |
190.5798 |
Dataset 10 public.csv |
censoring vs. no censoring
Kn_model_neuro4 <- brm(bf(negemo_full_m ~ neuro_t + (1|person_id),
sigma ~ neuro_t+ (1|person_id)), data = dataset,
iter = 7000, warmup = 2000, chains = 4,
control = list(adapt_delta = .9999), init = 0,
file = paste("models/", params$file, "Kn_model_neuro4"))
## Warning: Rows containing NAs were excluded from the model.
print(Kn_model_neuro4)
## Family: gaussian
## Links: mu = identity; sigma = log
## Formula: negemo_full_m ~ neuro_t + (1 | person_id)
## sigma ~ neuro_t + (1 | person_id)
## Data: dataset (Number of observations: 8618)
## Draws: 4 chains, each with iter = 7000; warmup = 2000; thin = 1;
## total post-warmup draws = 20000
##
## Group-Level Effects:
## ~person_id (Number of levels: 99)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.35 0.03 0.30 0.41 1.00 1943 4216
## sd(sigma_Intercept) 0.37 0.03 0.32 0.43 1.00 2402 3939
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.09 0.17 0.76 1.44 1.00 938 2102
## sigma_Intercept -1.37 0.18 -1.71 -1.00 1.00 1426 2669
## neuro_t 0.17 0.05 0.07 0.27 1.00 1039 2265
## sigma_neuro_t 0.21 0.06 0.10 0.32 1.00 1414 2523
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
extract_param <- function(model, parameter) {
ci <- posterior_summary(model, variable = parameter)
est <- sprintf("%.2f %.2f [%.2f;%.2f]", ci[,"Estimate"],ci[,"Est.Error"], ci[,"Q2.5"], ci[,"Q97.5"])
est
}
results_Cens <- data.frame(matrix(nrow = 2,
ncol = 6+1))
names(results_Cens) <- c("model", "negemo_b_neuro", "negemo_b_neuro_sigma", "negemo_sigma",
"posemo_b_neuro", "posemo_b_neuro_sigma", "posemo_sigma"
)
results_Cens$model <- c("modelCensoring", "modelnoCensoring")
#NA
results_Cens[1, "negemo_b_neuro"] <- extract_param(Kn_model_neuro3, "b_neuro_t")
results_Cens[1, "negemo_b_neuro_sigma"] <- extract_param(Kn_model_neuro3, "b_sigma_neuro_t")
results_Cens[1, "negemo_sigma"] <- extract_param(Kn_model_neuro3, "b_sigma_Intercept")
results_Cens[2, "negemo_b_neuro"] <- extract_param(Kn_model_neuro4, "b_neuro_t")
results_Cens[2, "negemo_b_neuro_sigma"] <- extract_param(Kn_model_neuro4, "b_sigma_neuro_t")
results_Cens[2, "negemo_sigma"] <- extract_param(Kn_model_neuro4, "b_sigma_Intercept")
BCLSM vs. model C (two-part model)
dataset <- dataset %>% left_join(dataset %>% distinct(person_id, neuro_t) %>% mutate(neuro_Q =Hmisc::cut2(neuro_t, g = 4)), by = c("person_id", "neuro_t"))
Kn_model_neuro_jinxed <- brm(bf(negemo_full_m | cens(Acens) ~ neuro_t + (1|gr(person_id, by = neuro_Q)),
sigma ~ neuro_t + (1|person_id)), data = dataset,
iter = 5000, warmup = 2000, chains = 4,
control = list(adapt_delta = .99), init = 0.1,
file = paste("models/", params$file, "Kn_model_neuro_jinxed"))
## Warning: Rows containing NAs were excluded from the model.
print(Kn_model_neuro_jinxed)
## Family: gaussian
## Links: mu = identity; sigma = log
## Formula: negemo_full_m | cens(Acens) ~ neuro_t + (1 | gr(person_id, by = neuro_Q))
## sigma ~ neuro_t + (1 | person_id)
## Data: dataset (Number of observations: 8618)
## Draws: 4 chains, each with iter = 5000; warmup = 2000; thin = 1;
## total post-warmup draws = 12000
##
## Group-Level Effects:
## ~person_id (Number of levels: 99)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept:neuro_Q[1.25,2.75)) 0.43 0.07 0.32 0.59 1.00 2419 4281
## sd(Intercept:neuro_Q[2.75,3.38)) 0.52 0.08 0.40 0.70 1.00 1755 2729
## sd(Intercept:neuro_Q[3.38,3.88)) 0.43 0.07 0.31 0.59 1.00 1997 4408
## sd(Intercept:neuro_Q[3.88,5.00]) 0.55 0.10 0.40 0.79 1.00 2107 4395
## sd(sigma_Intercept) 0.36 0.03 0.31 0.42 1.00 2086 3725
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.84 0.22 0.41 1.29 1.00 1175 2210
## sigma_Intercept -0.80 0.18 -1.15 -0.45 1.00 1628 3080
## neuro_t 0.21 0.07 0.07 0.35 1.00 1131 2246
## sigma_neuro_t 0.10 0.05 -0.01 0.20 1.00 1609 2829
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
modelB <- Kn_model_neuro3
modelC <- Kn_model_neuro_jinxed
modelB <- add_criterion(modelB, "loo")
modelC <- add_criterion(modelC, "loo")
loo_c <- loo_compare(modelB,modelC, criterion = "loo")
loo_c <- as.data.frame(loo_c)
loo_c$Dataset <- params$file
loo_c <- tibble::rownames_to_column(loo_c, "model")
library("writexl")
write_xlsx(loo_c,paste0("loo_c", params$file, ".xlsx"))
kable(loo_c)
modelC |
0.000000 |
0.000000 |
-7507.334 |
104.2691 |
295.8518 |
25.29199 |
15014.67 |
208.5381 |
Dataset 10 public.csv |
modelB |
-1.596982 |
1.521018 |
-7508.931 |
104.6387 |
297.5132 |
25.84490 |
15017.86 |
209.2774 |
Dataset 10 public.csv |
control for gender
dataset$gender <- as.factor(dataset$gender)
Kn_model_sex <- brm(bf(negemo_full_m | cens(Acens) ~ neuro_t + gender + (1|person_id),
sigma ~ neuro_t + gender), data = dataset,
iter = 9000, warmup = 2000, chains = 8,
control = list(adapt_delta = .99), inits = 0.1,
file = paste("models/", params$file, "Kn_model_sex"))
## Warning: Argument 'inits' is deprecated. Please use argument 'init' instead.
## Warning: Rows containing NAs were excluded from the model.
print(Kn_model_sex)
## Family: gaussian
## Links: mu = identity; sigma = log
## Formula: negemo_full_m | cens(Acens) ~ neuro_t + gender + (1 | person_id)
## sigma ~ neuro_t + gender
## Data: dataset (Number of observations: 8618)
## Draws: 8 chains, each with iter = 9000; warmup = 2000; thin = 1;
## total post-warmup draws = 56000
##
## Group-Level Effects:
## ~person_id (Number of levels: 99)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.45 0.03 0.39 0.52 1.00 4407 8461
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.78 0.24 0.32 1.25 1.00 2610 5247
## sigma_Intercept -0.66 0.05 -0.75 -0.57 1.00 32216 36423
## neuro_t 0.24 0.07 0.10 0.37 1.00 2645 5395
## gender1 0.07 0.12 -0.16 0.30 1.00 2644 5305
## sigma_neuro_t 0.07 0.01 0.04 0.10 1.00 34828 37981
## sigma_gender1 -0.01 0.02 -0.06 0.03 1.00 36045 39960
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(Kn_model_sex)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
## Warning: Censored responses are not shown in 'pp_check'.

plot(Kn_model_sex)


BCLSM Positive Emotion
Kp_model_neuro3 <- brm(bf(posemo_full_m | cens(Acens_p) ~ neuro_t + (1|person_id),
sigma ~ neuro_t + (1|person_id)), data = dataset,
chains = 4,
control = list(adapt_delta = .95), inits = 0.1,
iter = 7000, warmup = 2000,
file = paste("models/", params$file, "Kp_model_neuro3"))
## Warning: Argument 'inits' is deprecated. Please use argument 'init' instead.
## Warning: Rows containing NAs were excluded from the model.
print(Kp_model_neuro3)
## Family: gaussian
## Links: mu = identity; sigma = log
## Formula: posemo_full_m | cens(Acens_p) ~ neuro_t + (1 | person_id)
## sigma ~ neuro_t + (1 | person_id)
## Data: dataset (Number of observations: 8618)
## Draws: 4 chains, each with iter = 7000; warmup = 2000; thin = 1;
## total post-warmup draws = 20000
##
## Group-Level Effects:
## ~person_id (Number of levels: 99)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.46 0.04 0.39 0.53 1.00 2198 3998
## sd(sigma_Intercept) 0.32 0.02 0.28 0.37 1.00 3274 6776
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 4.05 0.22 3.62 4.48 1.00 1281 3025
## sigma_Intercept -0.34 0.15 -0.63 -0.05 1.00 2123 4565
## neuro_t -0.17 0.07 -0.30 -0.03 1.00 1246 2995
## sigma_neuro_t 0.01 0.05 -0.08 0.10 1.00 2096 4415
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(Kp_model_neuro3)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
## Warning: Censored responses are not shown in 'pp_check'.

plot(Kp_model_neuro3)


prior_summary(Kp_model_neuro3)
## prior class coef group resp dpar nlpar lb ub source
## (flat) b default
## (flat) b neuro_t (vectorized)
## (flat) b sigma default
## (flat) b neuro_t sigma (vectorized)
## student_t(3, 3.5, 2.5) Intercept default
## student_t(3, 0, 2.5) Intercept sigma default
## student_t(3, 0, 2.5) sd 0 default
## student_t(3, 0, 2.5) sd sigma 0 default
## student_t(3, 0, 2.5) sd person_id 0 (vectorized)
## student_t(3, 0, 2.5) sd Intercept person_id 0 (vectorized)
## student_t(3, 0, 2.5) sd person_id sigma 0 (vectorized)
## student_t(3, 0, 2.5) sd Intercept person_id sigma 0 (vectorized)
Model comparison
scale vs. no scale parameter
Kp_model_neuro2 <- brm(posemo_full_m | cens(Acens_p) ~ neuro_t + (1|person_id), data = dataset,
iter = 7000, warmup = 2000, chains = 4,
control = list(adapt_delta = .95), inits = 0.1,
file = paste("models/", params$file, "Kp_model_neuro2"))
## Warning: Argument 'inits' is deprecated. Please use argument 'init' instead.
## Warning: Rows containing NAs were excluded from the model.
print(Kp_model_neuro2)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: posemo_full_m | cens(Acens_p) ~ neuro_t + (1 | person_id)
## Data: dataset (Number of observations: 8618)
## Draws: 4 chains, each with iter = 7000; warmup = 2000; thin = 1;
## total post-warmup draws = 20000
##
## Group-Level Effects:
## ~person_id (Number of levels: 99)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.45 0.03 0.39 0.52 1.00 2142 4405
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 4.02 0.21 3.60 4.43 1.00 1398 2654
## neuro_t -0.16 0.07 -0.29 -0.03 1.00 1405 2690
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.78 0.01 0.77 0.79 1.00 31402 15408
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
modelAp <- Kp_model_neuro2
modelBp <- Kp_model_neuro3
modelAp <- add_criterion(modelAp, "loo")
modelBp <- add_criterion(modelBp, "loo")
looP <- loo_compare(modelAp,modelBp, criterion = "loo")
looP <- as.data.frame(looP)
looP$Dataset <- params$file
looP <- tibble::rownames_to_column(looP, "model")
library("writexl")
write_xlsx(looP,paste0("looP", params$file, ".xlsx"))
kable(looP)
modelBp |
0.0000 |
0.00000 |
-9512.602 |
75.33636 |
203.65033 |
5.991610 |
19025.20 |
150.6727 |
Dataset 10 public.csv |
modelAp |
-657.6389 |
39.24459 |
-10170.241 |
76.43671 |
97.78641 |
1.740907 |
20340.48 |
152.8734 |
Dataset 10 public.csv |
censoring vs. no censoring
Kp_model_neuro4 <- brm(bf(posemo_full_m ~ neuro_t + (1|person_id),
sigma ~ neuro_t + (1|person_id)), data = dataset,
chains = 4,
control = list(adapt_delta = .9999), inits = 0,
iter = 7000, warmup = 2000,
file = paste("models/", params$file, "Kp_model_neuro4"))
## Warning: Argument 'inits' is deprecated. Please use argument 'init' instead.
## Warning: Rows containing NAs were excluded from the model.
print(Kp_model_neuro4)
## Family: gaussian
## Links: mu = identity; sigma = log
## Formula: posemo_full_m ~ neuro_t + (1 | person_id)
## sigma ~ neuro_t + (1 | person_id)
## Data: dataset (Number of observations: 8618)
## Draws: 4 chains, each with iter = 7000; warmup = 2000; thin = 1;
## total post-warmup draws = 20000
##
## Group-Level Effects:
## ~person_id (Number of levels: 99)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.42 0.03 0.37 0.49 1.00 1492 3332
## sd(sigma_Intercept) 0.27 0.02 0.23 0.31 1.00 2685 5608
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.95 0.20 3.56 4.35 1.00 1086 2017
## sigma_Intercept -0.46 0.13 -0.71 -0.21 1.00 1302 3197
## neuro_t -0.15 0.06 -0.27 -0.03 1.00 1063 1804
## sigma_neuro_t 0.04 0.04 -0.04 0.11 1.00 1248 3351
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#pa
results_Cens[1, "posemo_b_neuro"] <- extract_param(Kp_model_neuro3, "b_neuro_t")
results_Cens[1, "posemo_b_neuro_sigma"] <- extract_param(Kp_model_neuro3, "b_sigma_neuro_t")
results_Cens[1, "posemo_sigma"] <- extract_param(Kp_model_neuro3, "b_sigma_Intercept")
results_Cens[2, "posemo_b_neuro"] <- extract_param(Kp_model_neuro4, "b_neuro_t")
results_Cens[2, "posemo_b_neuro_sigma"] <- extract_param(Kp_model_neuro4, "b_sigma_neuro_t")
results_Cens[2, "posemo_sigma"] <- extract_param(Kp_model_neuro4, "b_sigma_Intercept")
BCLSM vs. model C (two-part model)
Kp_model_neuro_jinxed <- brm(bf(posemo_full_m | cens(Acens) ~ neuro_t + (1|gr(person_id, by = neuro_Q)),
sigma ~ neuro_t + (1|person_id)), data = dataset,
iter = 5000, warmup = 2000, chains = 4,
control = list(adapt_delta = .99), init = 0.1,
file = paste("models/", params$file, "Kp_model_neuro_jinxed"))
## Warning: Rows containing NAs were excluded from the model.
print(Kp_model_neuro_jinxed)
## Family: gaussian
## Links: mu = identity; sigma = log
## Formula: posemo_full_m | cens(Acens) ~ neuro_t + (1 | gr(person_id, by = neuro_Q))
## sigma ~ neuro_t + (1 | person_id)
## Data: dataset (Number of observations: 8618)
## Draws: 4 chains, each with iter = 5000; warmup = 2000; thin = 1;
## total post-warmup draws = 12000
##
## Group-Level Effects:
## ~person_id (Number of levels: 99)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept:neuro_Q[1.25,2.75)) 0.42 0.07 0.31 0.58 1.00 1949 3507
## sd(Intercept:neuro_Q[2.75,3.38)) 0.40 0.06 0.30 0.53 1.00 1674 3310
## sd(Intercept:neuro_Q[3.38,3.88)) 0.42 0.07 0.31 0.57 1.00 1457 3494
## sd(Intercept:neuro_Q[3.88,5.00]) 0.45 0.09 0.31 0.65 1.00 1539 2718
## sd(sigma_Intercept) 0.27 0.02 0.23 0.32 1.00 1918 4422
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.42 0.21 3.00 3.83 1.00 996 1928
## sigma_Intercept -0.42 0.13 -0.68 -0.16 1.00 1618 3101
## neuro_t -0.05 0.07 -0.18 0.09 1.00 919 1739
## sigma_neuro_t 0.02 0.04 -0.06 0.10 1.00 1577 2856
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
modelB <- Kp_model_neuro3
modelC <- Kp_model_neuro_jinxed
modelB <- add_criterion(modelB, "loo")
modelC <- add_criterion(modelC, "loo")
loo_cP <- loo_compare(modelB,modelC, criterion = "loo")
## Warning: Not all models have the same y variable. ('yhash' attributes do not match)
loo_cP <- as.data.frame(loo_cP)
loo_cP$Dataset <- params$file
#loo_cP <- tibble::rownames_to_column(loo_c, "model")
library("writexl")
write_xlsx(loo_cP,paste0("loo_cP", params$file, ".xlsx"))
kable(loo_cP)
modelC |
0.000 |
0.00000 |
-7803.617 |
74.08185 |
186.3346 |
5.640617 |
15607.23 |
148.1637 |
Dataset 10 public.csv |
modelB |
-1708.985 |
52.62172 |
-9512.602 |
75.33636 |
203.6503 |
5.991610 |
19025.20 |
150.6727 |
Dataset 10 public.csv |
extract_param <- function(model, parameter) {
ci <- posterior_summary(model, variable = parameter)
est <- sprintf("%.2f %.2f [%.2f;%.2f]", ci[,"Estimate"],ci[,"Est.Error"], ci[,"Q2.5"], ci[,"Q97.5"])
est
}
results_K <- data.frame(matrix(nrow = 7,
ncol = 8+1))
names(results_K) <- c("model", "negemo_b_neuro", "negemo_b_neuro_sigma", "negemo_sigma", "b_neg_sigma_sex",
"posemo_b_neuro", "posemo_b_neuro_sigma", "posemo_sigma", "b_pos_sigma_sex"
)
results_K$model <- c("model1", "model2", "model3",
"RSD", "RSD_weight", "SD", "gender")
#NA
results_K[2, "negemo_b_neuro"] <- extract_param(Kn_model_neuro2, "b_neuro_t")
results_K[2, "negemo_sigma"] <- extract_param(Kn_model_neuro2, "sigma")
results_K[3, "negemo_b_neuro"] <- extract_param(Kn_model_neuro3, "b_neuro_t")
results_K[3, "negemo_b_neuro_sigma"] <- extract_param(Kn_model_neuro3, "b_sigma_neuro_t")
results_K[3, "negemo_sigma"] <- extract_param(Kn_model_neuro3, "b_sigma_Intercept")
#gender
results_K[7, "negemo_b_neuro"] <- extract_param(Kn_model_sex, "b_neuro_t")
results_K[7, "negemo_b_neuro_sigma"] <- extract_param(Kn_model_sex, "b_sigma_neuro_t")
results_K[7, "negemo_sigma"] <- extract_param(Kn_model_sex, "b_sigma_Intercept")
results_K[7, "b_neg_sigma_sex"] <- extract_param(Kn_model_sex, "b_sigma_gender1")
#pa
results_K[2, "posemo_b_neuro"] <- extract_param(Kp_model_neuro2, "b_neuro_t")
results_K[2, "posemo_sigma"] <- extract_param(Kp_model_neuro2, "sigma")
results_K[3, "posemo_b_neuro"] <- extract_param(Kp_model_neuro3, "b_neuro_t")
results_K[3, "posemo_b_neuro_sigma"] <- extract_param(Kp_model_neuro3, "b_sigma_neuro_t")
results_K[3, "posemo_sigma"] <- extract_param(Kp_model_neuro3, "b_sigma_Intercept")
RVI (Relative Variability Index)
data_w <- unique(dataset[,2:5])
Unweighted RVI
data_w$RSD_NA <- NA
for (i in 1:nrow(data_w)) {
data_w$RSD_NA[i] <- relativeSD(dataset$negemo_full_m[dataset$person_id == data_w$person_id[i]],
1, 5)
}
range(data_w$RSD_NA, na.rm = T)
## [1] 0.1803172 0.6717369
mean(data_w$RSD_NA, na.rm = T)
## [1] 0.3923167
sd(data_w$RSD_NA, na.rm = T)
## [1] 0.09937926
data_w$logrsd_n <- log(data_w$RSD_NA)
#plot(data_w$logrsd_n)
m_rvi_na <- brm(logrsd_n ~ neuro_t, data= data_w,
file = paste("models/", params$file, "Kn_model_logrsd_uw"))
print(m_rvi_na)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: logrsd_n ~ neuro_t
## Data: data_w (Number of observations: 99)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.01 0.12 -1.24 -0.78 1.00 4305 2871
## neuro_t 0.01 0.04 -0.06 0.08 1.00 4319 2641
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.26 0.02 0.23 0.30 1.00 3815 2805
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
results_K[4,3] <- extract_param(m_rvi_na, "b_neuro_t")
data_w$RSD_PA <- NA
for (i in 1:nrow(data_w)) {
data_w$RSD_PA[i] <- relativeSD(dataset$posemo_full_m[dataset$person_id == data_w$person_id[i]],
1, 5)
}
range(data_w$RSD_PA)
## [1] 0.1818346 0.6488575
data_w$logrsd_p <- log(data_w$RSD_PA)
m_rvi_pa <- brm(logrsd_p ~ neuro_t, data= data_w,
file = paste("models/", params$file, "Kp_model_logrsd_uw"))
print(m_rvi_pa)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: logrsd_p ~ neuro_t
## Data: data_w (Number of observations: 99)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.05 0.13 -1.31 -0.79 1.00 3866 2970
## neuro_t 0.02 0.04 -0.06 0.10 1.00 3736 2914
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.29 0.02 0.25 0.34 1.00 3710 2812
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
results_K[4,6] <- extract_param(m_rvi_pa, "b_neuro_t")
Weighted RVI
data_w$mean_NA <- NA
for (i in 1:nrow(data_w)) {
data_w$mean_NA[i] <- mean(dataset$negemo_full_m[dataset$person_id == data_w$person_id[i]],
na.rm = T)
}
mean(data_w$mean_NA)
## [1] 1.639792
sd(data_w$mean_NA)
## [1] 0.3689099
data_w$mean_PA <- NA
for (i in 1:nrow(data_w)) {
data_w$mean_PA[i] <- mean(dataset$posemo_full_m[dataset$person_id == data_w$person_id[i]],
na.rm = T)
}
mean(data_w$mean_PA)
## [1] 3.49474
sd(data_w$mean_PA)
## [1] 0.4432328
data_w$weight_NA <- NA
for (i in 1:nrow(data_w)) {
if (!is.na(data_w$mean_NA[i])) {
data_w$weight_NA[i] <- maximumSD(data_w$mean_NA[i], # Mittelwert
1, # Minimum
5, # Maximum
sum(!is.na(dataset$negemo_full_m[dataset$person_id == data_w$person_id[i]]))
)
# W as reported in paper
data_w$weight_NA[i] <- data_w$weight_NA[i]^2
}
}
mean(data_w$weight_NA)
## [1] 2.007478
sd(data_w$weight_NA)
## [1] 0.8878574
range(data_w$weight_NA)
## [1] 0.02477827 4.03320278
m_rvi_na_w <- brm(logrsd_n| weights(weight_NA) ~ neuro_t, data= data_w,
file = paste("models/", params$file, "Kn_model_logrsd"))
## Warning: Rows containing NAs were excluded from the model.
print(m_rvi_na_w)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: logrsd_n | weights(weight_NA) ~ neuro_t
## Data: data_w (Number of observations: 99)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.11 0.09 -1.28 -0.93 1.00 4001 3222
## neuro_t 0.04 0.03 -0.02 0.09 1.00 4351 3192
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.25 0.01 0.23 0.28 1.00 4136 2989
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
results_K[5,3] <- extract_param(m_rvi_na_w, "b_neuro_t")
data_w$weight_PA <- NA
for (i in 1:nrow(data_w)) {
if (!is.na(data_w$mean_PA[i])) {
data_w$weight_PA[i] <- maximumSD(data_w$mean_PA[i], # Mittelwert
1, # Minimum
5, # Maximum
sum(!is.na(dataset$posemo_full_m[dataset$person_id == data_w$person_id[i]]))
)
# W as reported in paper
data_w$weight_PA[i] <- data_w$weight_PA[i]^2
}
}
m_rvi_pa_w <- brm(logrsd_p| weights(weight_PA) ~ neuro_t, data= data_w,
file = paste("models/", params$file, "Kp_model_logrsd"))
## Warning: Rows containing NAs were excluded from the model.
print(m_rvi_pa_w)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: logrsd_p | weights(weight_PA) ~ neuro_t
## Data: data_w (Number of observations: 99)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.08 0.07 -1.22 -0.94 1.00 4418 2969
## neuro_t 0.03 0.02 -0.02 0.07 1.00 4423 2851
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.28 0.01 0.26 0.30 1.00 3958 2945
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
results_K[5,6] <- extract_param(m_rvi_pa_w, "b_neuro_t")
SD
data_w$sd_NA <- NA
for (i in 1:nrow(data_w)) {
data_w$sd_NA[i] <- sd(dataset$negemo_full_m[dataset$person_id == data_w$person_id[i]],
na.rm = T)
}
data_w$sd_PA <- NA
for (i in 1:nrow(data_w)) {
data_w$sd_PA[i] <- sd(dataset$posemo_full_m[dataset$person_id == data_w$person_id[i]],
na.rm = T)
}
mean(data_w$sd_NA)
## [1] 0.5331675
mean(data_w$sd_PA)
## [1] 0.7259925
data_w$sd_PA[data_w$sd_PA == 0] <- NA
data_w$sd_NA[data_w$sd_NA == 0] <- NA
data_w$logsd_NA <- log(data_w$sd_NA)
data_w$logsd_PA <- log(data_w$sd_PA)
m_sd_na <- brm(logsd_NA ~ neuro_t, data= data_w,
file = paste("models/", params$file, "Kn_model_logsd"))
## Warning: Rows containing NAs were excluded from the model.
m_sd_na
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: logsd_NA ~ neuro_t
## Data: data_w (Number of observations: 99)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.39 0.18 -1.73 -1.04 1.00 3792 2926
## neuro_t 0.22 0.05 0.12 0.32 1.00 3726 2969
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.38 0.03 0.33 0.43 1.00 3434 2832
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
results_K[6,3] <- extract_param(m_sd_na, "b_neuro_t")
m_sd_pa <- brm(logsd_PA ~ neuro_t, data= data_w,
file = paste("models/", params$file, "Kp_model_logsd"))
## Warning: Rows containing NAs were excluded from the model.
m_sd_pa
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: logsd_PA ~ neuro_t
## Data: data_w (Number of observations: 99)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.47 0.12 -0.72 -0.23 1.00 3878 2681
## neuro_t 0.04 0.04 -0.04 0.11 1.00 3867 2789
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.28 0.02 0.24 0.32 1.00 3819 3074
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
results_K[6,6] <- extract_param(m_sd_pa, "b_neuro_t")
library("writexl")
write_xlsx(results_K,paste0("", params$file, ".xlsx"))
Incremental Validity of SD
na_noneurot <- brm(bf(negemo_full_m | cens(Acens) ~ (1|person_id),
sigma ~ (1|person_id)), data = dataset,
iter = 7000, warmup = 2000,chains = 4,
control = list(adapt_delta = .99), init = 0.1,
file = "na_noneurot")
## Warning: Rows containing NAs were excluded from the model.
print(na_noneurot)
## Family: gaussian
## Links: mu = identity; sigma = log
## Formula: negemo_full_m | cens(Acens) ~ (1 | person_id)
## sigma ~ (1 | person_id)
## Data: dataset (Number of observations: 8702)
## Draws: 4 chains, each with iter = 7000; warmup = 2000; thin = 1;
## total post-warmup draws = 20000
##
## Group-Level Effects:
## ~person_id (Number of levels: 100)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.49 0.04 0.42 0.57 1.00 1606 3568
## sd(sigma_Intercept) 0.37 0.03 0.32 0.43 1.00 2181 4511
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.52 0.05 1.42 1.62 1.01 455 1147
## sigma_Intercept -0.48 0.04 -0.56 -0.41 1.00 1288 2675
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
rans <- coef(na_noneurot, summary = T)
rans_i <- as.data.frame(rans$person_id[,,"Intercept"]) %>% tibble::rownames_to_column("person_id")
rans_s <- as.data.frame(rans$person_id[,,"sigma_Intercept"]) %>% tibble::rownames_to_column("person_id")
nrow(rans_s)
## [1] 100
nrow(rans_i)
## [1] 100
nrow(data_w)
## [1] 100
dat <- merge(rans_s, rans_i, all = T, by= "person_id")
dat <- merge(dat, data_w, all = T, by= "person_id")
names(dat)[2] <- "Est.SD"
names(dat)[6] <- "Est.M"
fit1 <- lm(neuro_t ~ Est.SD + Est.M , data=dat)
summary(fit1)
##
## Call:
## lm(formula = neuro_t ~ Est.SD + Est.M, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.20677 -0.54788 0.09355 0.52907 1.56921
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6170 0.2359 11.092 < 2e-16 ***
## Est.SD 0.4288 0.1902 2.254 0.02648 *
## Est.M 0.4969 0.1408 3.530 0.00064 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6603 on 96 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1449, Adjusted R-squared: 0.1271
## F-statistic: 8.137 on 2 and 96 DF, p-value: 0.0005441
fit1.2 <- lm(neuro_t ~ Est.M , data=dat)
summary(fit1.2)
##
## Call:
## lm(formula = neuro_t ~ Est.M, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.35119 -0.59910 0.09046 0.51579 1.75698
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.4489 0.2285 10.718 < 2e-16 ***
## Est.M 0.4691 0.1431 3.277 0.00145 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6741 on 97 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.0997, Adjusted R-squared: 0.09042
## F-statistic: 10.74 on 1 and 97 DF, p-value: 0.001454
aov <- anova(fit1.2, fit1)
aov
## Analysis of Variance Table
##
## Model 1: neuro_t ~ Est.M
## Model 2: neuro_t ~ Est.SD + Est.M
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 97 44.077
## 2 96 41.862 1 2.2151 5.0799 0.02648 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit1)$r.squared-summary(fit1.2)$r.squared
## [1] 0.04524539
results_SDin <- data.frame(matrix(nrow = 1, ncol = 9))
names(results_SDin) <- c("Dataset","b_SD","Err.SD","p(b_SD)","b_M","Err.M","p(b_M)","ΔR²", "p")
results_SDin$Dataset <- params$file
results_SDin$`ΔR²` <- summary(fit1)$r.squared-summary(fit1.2)$r.squared
results_SDin$`p` <- aov$`Pr(>F)`[2]
results_SDin$Err.SD <- summary(fit1)$coefficients[2,2]
results_SDin$b_SD <- fit1$coefficients[2]
results_SDin$`p(b_SD)` <- summary(fit1)$coefficients[2,4]
results_SDin$b_M <- fit1$coefficients[3]
results_SDin$`p(b_M)` <- summary(fit1)$coefficients[3,4]
results_SDin$Err.M <- summary(fit1)$coefficients[3,2]
library("writexl")
write_xlsx(results_SDin,paste0("SD", params$file, ".xlsx"))