set.seed(12345)
library(tidyverse)
<- 500
n <- tibble(
ceiling Moderator = c(rep(0, n/5*4), rep(1, n/5)),
pred = 1.4 * Moderator + 0.7 * rnorm(n),
out = 1.1 * Moderator + 0.6 * pred + 0.5 * rnorm(n)
%>%
) mutate(
Moderator = as.factor(Moderator)
)sd(ceiling$out)
## [1] 0.9513978
library(ggplot2)
theme_set(ggthemes::theme_clean())
<- ggplot(data = ceiling, aes(x = pred, y = out, color = Moderator)) +
p1 geom_point(alpha = .3) +
geom_smooth(method = "lm", formula = y~x) +
coord_cartesian(ylim = c(min(ceiling$out), max(ceiling$out))) +
xlab("Predictor") +
ylab("Outcome") +
scale_color_viridis_d(guide = F, begin = 0.3, end = 0.8) +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
p1
qplot(residuals(lm(out ~ pred*Moderator, ceiling)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(ceiling$out)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
$cens <- ceiling$out
ceiling$cens[ceiling$cens > 2] <- 2
ceiling<- ggplot(data = ceiling, aes(x = pred, y = cens, color = Moderator)) +
p2 geom_point(alpha = .3) +
geom_smooth(method = "lm", formula = y~x) +
coord_cartesian(ylim = c(min(ceiling$out), max(ceiling$out))) +
xlab("Predictor") +
ylab("Outcome with ceiling") +
scale_color_viridis_d(begin = 0.3, end = 0.8) +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
p2
<- lm(cens ~ pred*Moderator, ceiling)
reg summary(reg)
##
## Call:
## lm(formula = cens ~ pred * Moderator, data = ceiling)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.42017 -0.29491 0.02019 0.30706 1.63562
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02885 0.02398 1.203 0.22956
## pred 0.60212 0.03365 17.894 < 2e-16 ***
## Moderator1 1.17447 0.11827 9.930 < 2e-16 ***
## pred:Moderator1 -0.25018 0.08423 -2.970 0.00312 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4766 on 496 degrees of freedom
## Multiple R-squared: 0.716, Adjusted R-squared: 0.7143
## F-statistic: 416.9 on 3 and 496 DF, p-value: < 2.2e-16
qplot(residuals(reg))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(ceiling$cens)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
$ordinal <- ceiling$out
ceiling$ordinal[ceiling$ordinal > 2] <- 2
ceiling$ordinal[ceiling$ordinal < -2] <- -2
ceiling$ordinal <- round(ceiling$ordinal)
ceiling<- ggplot(data = ceiling, aes(x = pred, y = ordinal, color = Moderator)) +
p3 geom_point(alpha = .3) +
geom_smooth(method = "lm", formula = y~x) +
coord_cartesian(ylim = c(min(ceiling$out), max(ceiling$out))) +
scale_color_viridis_d(guide = F, begin = 0.3, end = 0.8) +
xlab("Predictor") +
ylab("Outcome measured on Likert scale") +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
p3
<- lm(ordinal ~ pred*Moderator, ceiling)
reg summary(reg)
##
## Call:
## lm(formula = ordinal ~ pred * Moderator, data = ceiling)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.64816 -0.33252 0.04059 0.33757 1.72114
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01497 0.02827 0.529 0.59675
## pred 0.62222 0.03966 15.688 < 2e-16 ***
## Moderator1 1.31889 0.13941 9.461 < 2e-16 ***
## pred:Moderator1 -0.32511 0.09929 -3.275 0.00113 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5618 on 496 degrees of freedom
## Multiple R-squared: 0.6612, Adjusted R-squared: 0.6592
## F-statistic: 322.7 on 3 and 496 DF, p-value: < 2.2e-16
qplot(residuals(reg))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(ceiling$ordinal)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(cowplot)
plot_grid(p1, p2 + theme(legend.title = element_text(size = 9), legend.background = element_blank(), legend.position = c(0.5,0.85), legend.justification = "right"), p3, labels = c("A", "B", "C"), nrow = 1, align = "hv", axis = "lb")
ggsave("plots/ceiling_multiplot.png", width = 7, height = 3)
ggsave("plots/ceiling_multiplot.pdf", width = 7, height = 3)
library(brms)
# original data: no interaction
summary(brm(out ~ pred*Moderator, data = ceiling, cores = 4, backend = "cmdstanr", file = "models/ceiling/original_no_interaction"))
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: out ~ pred * Moderator
## Data: ceiling (Number of observations: 500)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.03 0.03 -0.02 0.08 1.00 4133 3052
## pred 0.61 0.04 0.54 0.68 1.00 3286 1839
## Moderator1 1.11 0.13 0.85 1.36 1.00 2826 2508
## pred:Moderator1 -0.11 0.09 -0.29 0.07 1.00 2741 2379
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.50 0.02 0.47 0.54 1.00 3706 2518
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# with ceiling effect: interaction
summary(uncensored <-brm(cens ~ pred*Moderator, data = ceiling, cores = 4, backend = "cmdstanr", file = "models/ceiling/ceiling_effect"))
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: cens ~ pred * Moderator
## Data: ceiling (Number of observations: 500)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.03 0.02 -0.02 0.08 1.00 3999 2960
## pred 0.60 0.03 0.53 0.67 1.00 3813 2612
## Moderator1 1.18 0.12 0.94 1.41 1.00 2967 2857
## pred:Moderator1 -0.25 0.08 -0.41 -0.09 1.00 2788 2793
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.48 0.02 0.45 0.51 1.00 3732 2176
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# with ordinal variable: interaction
summary(brm(ordinal ~ pred*Moderator, data = ceiling, cores = 4, backend = "cmdstanr", file = "models/ceiling/ordinal_as_gaussian"))
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: ordinal ~ pred * Moderator
## Data: ceiling (Number of observations: 500)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.01 0.03 -0.04 0.07 1.00 4424 3242
## pred 0.62 0.04 0.54 0.70 1.00 3678 2805
## Moderator1 1.32 0.14 1.05 1.59 1.00 2583 2655
## pred:Moderator1 -0.33 0.10 -0.52 -0.14 1.00 2664 2591
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.56 0.02 0.53 0.60 1.00 4162 2844
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
qplot(ceiling$ordinal)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(ceiling$cens)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Now let’s set up a better model in brms. First we set a variable that tells us which values are censored and in what way
$censored <- "none"
ceiling$censored[ceiling$cens >= 2] <- "right"
ceilingtable(ceiling$censored)
##
## none right
## 463 37
Run model
<- brm(cens | cens(censored) ~ pred * Moderator, data = ceiling, cores = 4, backend = "cmdstanr", file = "models/ceiling/censored")
censored summary(censored)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: cens | cens(censored) ~ pred * Moderator
## Data: ceiling (Number of observations: 500)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.03 0.03 -0.02 0.08 1.00 3934 3039
## pred 0.60 0.04 0.53 0.67 1.00 3360 2572
## Moderator1 1.04 0.13 0.79 1.29 1.00 2357 2392
## pred:Moderator1 -0.04 0.10 -0.23 0.15 1.00 2377 2377
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.50 0.02 0.47 0.54 1.00 3815 2611
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
loo(uncensored, censored)
## Warning: Not all models have the same y variable. ('yhash' attributes do not
## match)
## Output of model 'uncensored':
##
## Computed from 4000 by 500 log-likelihood matrix
##
## Estimate SE
## elpd_loo -341.7 16.8
## p_loo 4.4 0.4
## looic 683.4 33.7
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'censored':
##
## Computed from 4000 by 500 log-likelihood matrix
##
## Estimate SE
## elpd_loo -364.2 15.7
## p_loo 4.6 0.4
## looic 728.4 31.4
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## Model comparisons:
## elpd_diff se_diff
## uncensored 0.0 0.0
## censored -22.5 4.2
Now we treat the ordinal variable as ordinal instead of Gaussian. We’re assuming a cumulative distribution with equidistant thresholds (because that’s what we simulated).
$ordinal <- factor(ceiling$ordinal, ordered = TRUE)
ceiling<- brm(ordinal ~ pred*Moderator, data = ceiling, family = cumulative(), cores = 4, backend = "cmdstanr", file = "models/ceiling/cumulative")
ordinal summary(ordinal)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: ordinal ~ pred * Moderator
## Data: ceiling (Number of observations: 500)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept[1] -5.80 0.61 -7.12 -4.74 1.00 2102 2225
## Intercept[2] -1.67 0.15 -1.97 -1.38 1.00 3670 3480
## Intercept[3] 1.58 0.15 1.30 1.87 1.00 3087 3165
## Intercept[4] 5.42 0.42 4.65 6.29 1.00 3004 2369
## pred 2.15 0.18 1.80 2.53 1.00 2690 2644
## Moderator1 4.39 0.68 3.11 5.77 1.00 2241 2629
## pred:Moderator1 -0.41 0.49 -1.37 0.60 1.00 2365 2775
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).