set.seed(12345)
<- 10000
n <- rbinom(n, size = 1, prob = .50)
inoffice <- rbinom(n, size = 1, prob = .10)
mentor
# Probability of quitting
<- - 2.9 + inoffice*2.7 - mentor*1.2 + inoffice*mentor*0.7
latent_quit
<- function(x) {
logit exp(x)/(1 + exp(x))
}
<- logit(latent_quit)
prob_quit <- rbinom(n, size = 1, prob = prob_quit)
quit
<- data.frame(inoffice, mentor, quit) office
library(brms)
<- brm(quit ~ mentor*inoffice,
whoquits data = office,
family = bernoulli,
chains = 4, file = "models/whoquits.rds")
summary(whoquits)
## Family: bernoulli
## Links: mu = logit
## Formula: quit ~ mentor * inoffice
## Data: office (Number of observations: 10000)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -3.06 0.07 -3.21 -2.91 1.00 1873 1680
## mentor -1.45 0.42 -2.33 -0.73 1.00 1327 1826
## inoffice 2.90 0.08 2.75 3.06 1.00 1993 1742
## mentor:inoffice 0.86 0.43 0.09 1.75 1.00 1435 1802
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
summary(glm(quit ~ mentor*inoffice,
data = office,
family = binomial))
##
## Call:
## glm(formula = quit ~ mentor * inoffice, family = binomial, data = office)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1100 -0.8799 -0.3033 -0.1537 2.9815
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.05589 0.07236 -42.234 < 2e-16 ***
## mentor -1.37691 0.41643 -3.306 0.000945 ***
## inoffice 2.89528 0.07823 37.012 < 2e-16 ***
## mentor:inoffice 0.78828 0.42865 1.839 0.065916 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 11151.2 on 9999 degrees of freedom
## Residual deviance: 8592.2 on 9996 degrees of freedom
## AIC: 8600.2
##
## Number of Fisher Scoring iterations: 6
<- logit(fixef(whoquits)["Intercept", 1])
not_inoffice_no_mentoring <- logit(fixef(whoquits)["Intercept", 1] + fixef(whoquits)["mentor", 1])
not_inoffice_mentoring
- not_inoffice_mentoring not_inoffice_no_mentoring
## [1] 0.03397337
<- logit(fixef(whoquits)["Intercept", 1] + fixef(whoquits)["inoffice", 1])
inoffice_no_mentoring <- logit(fixef(whoquits)["Intercept", 1] + fixef(whoquits)["inoffice", 1]
inoffice_mentoring + fixef(whoquits)["mentor", 1] + fixef(whoquits)["mentor:inoffice", 1])
- inoffice_mentoring inoffice_no_mentoring
## [1] 0.1395221
# Helper function
<- function(input, variable, value) {
fix_var # Function takes a dataframe, a variable name, and a specific value
# Returns the dataframe but with the given variable set to the
# specific value
<- value
input[, variable] return(input)
}
<- function(data_no_mentoring, data_mentoring, modelfit) {
predictions # Takes a list of two dataframes and a model
# Returns margins, average marginal effect, relative risk
library(brms)
<- predict(modelfit, newdata = data_no_mentoring, summary = FALSE)
pred1 <- predict(modelfit, newdata = data_mentoring, summary = FALSE)
pred2
# Margins
<- rowMeans(pred1)
averages1 <- rowMeans(pred2)
averages2
<- c(mean(averages1), quantile(averages1, probs = c(.50, .025, .975)))
nonmentors <- c(mean(averages2), quantile(averages2, probs = c(.50, .025, .975)))
mentors <- rbind(nonmentors, mentors)
margins
# Average Effect
<- pred2 - pred1
diff <- rowMeans(diff)
average_diff <- c(mean(average_diff), quantile(average_diff, probs = c(.50, .025, .975)))
ame
# Relative risk
<- c(mean(rowMeans(pred2)/rowMeans(pred1)),
relative quantile(rowMeans(pred2)/rowMeans(pred1),
probs = c(.5, .025, .975)))
# Return all results
<- list(margins, ame, relative)
all_results names(all_results) <- c("Predictive margins", "Average marginal effect", "Relative risk")
return(all_results)
}
<- function(data_no_mentoring_1, data_mentoring_1,
comparison
data_no_mentoring_2, data_mentoring_2, modelfit) {# Takes a list of four dataframes and a model
# Compare average marginal effects and relative risks
# Difference is group2 - group1
library(brms)
<- predict(modelfit, newdata = data_no_mentoring_1, summary = FALSE)
pred1 <- predict(modelfit, newdata = data_mentoring_1, summary = FALSE)
pred2 <- predict(modelfit, newdata = data_no_mentoring_2, summary = FALSE)
pred3 <- predict(modelfit, newdata = data_mentoring_2, summary = FALSE)
pred4
# Average Effect
<- pred2 - pred1 # mentor effect group 1
diff1 <- pred4 - pred3 # mentor effect group 2
diff2
# Difference in the effects
<- diff2 - diff1
diff3 <- rowMeans(diff3)
average_diff
# Summarize
<- c(mean(average_diff), quantile(average_diff, probs = c(.50, .025, .975)))
difference_prob
# Relative risk
<- rowMeans(pred2)/rowMeans(pred1)
relative1 <- rowMeans(pred4)/rowMeans(pred3)
relative2
<- relative2 - relative1
rel_diffs
<- c(mean(rel_diffs),
difference_rr quantile(rel_diffs,
probs = c(.5, .025, .975)))
# Return all results
<- list(difference_prob, difference_rr)
all_results names(all_results) <- c("Difference in AME", "Difference in RR")
return(all_results)
}
# Not inoffice
<- predictions(fix_var(fix_var(office, "mentor", 0), "inoffice", 0),
pred_no_inoffice fix_var(fix_var(office, "mentor", 1), "inoffice", 0),
whoquits)
# inoffice
<- predictions(fix_var(fix_var(office, "mentor", 0), "inoffice", 1),
pred_inoffice fix_var(fix_var(office, "mentor", 1), "inoffice", 1),
whoquits)
# Comparison
<- comparison(fix_var(fix_var(office, "mentor", 0), "inoffice", 0),
comp fix_var(fix_var(office, "mentor", 1), "inoffice", 0),
fix_var(fix_var(office, "mentor", 0), "inoffice", 1),
fix_var(fix_var(office, "mentor", 1), "inoffice", 1),
whoquits)
<- function(x) {
logit exp(x)/(1 + exp(x))
}
library(ggplot2)
# Intercept
<- summary(whoquits)$fixed[1, "Estimate"]
intercept
# mentor
<- summary(whoquits)$fixed[2, "Estimate"]
mentor
# inoffice
<- summary(whoquits)$fixed[3, "Estimate"]
inoffice
# Interaction
<- summary(whoquits)$fixed[4, "Estimate"]
interact
theme_set(theme_bw())
ggplot(data = data.frame(x = 0), mapping = aes(x = x)) +
xlim(-5, 5) +
stat_function(fun = logit) +
xlab("Logit") +
ylab("Probability of quitting") +
geom_point(aes(x = intercept,
y = logit(intercept)), color = "blue") +
geom_point(aes(x = intercept + mentor,
y = logit(intercept + mentor)), color = "blue") +
# Horizontal
geom_segment(aes(x = intercept,
xend = intercept + mentor,
y = 0,
yend = 0), color = "blue", size = 2) +
geom_segment(aes(x = intercept, xend = intercept,
y = logit(intercept), yend = 0), linetype = "dashed", color = "grey") +
geom_segment(aes(x = intercept + mentor, xend = intercept + mentor,
y = logit(intercept + mentor), yend = 0), linetype = "dashed", color = "grey") +
# Vertical
geom_segment(aes(x = -5, xend = -5, y = logit(intercept),
yend = logit(intercept + mentor)), color = "blue", size = 2) +
geom_segment(aes(x = -5, xend = intercept,
y = logit(intercept), yend = logit(intercept)), linetype = "dashed", color = "grey") +
geom_segment(aes(x = -5, xend = intercept + mentor,
y = logit(intercept + mentor),
yend = logit(intercept + mentor)), linetype = "dashed", color = "grey") +
geom_point(aes(x = intercept + inoffice,
y = logit(intercept + inoffice)), color = "red") +
geom_point(aes(x = intercept + mentor + inoffice + interact,
y = logit(intercept + mentor + inoffice + interact)), color = "red") +
# Horizontal
geom_segment(aes(x = intercept + inoffice,
xend = intercept + mentor + inoffice + interact, y = 0, yend = 0), color = "red", size = 2) +
geom_segment(aes(x = intercept + inoffice, xend = intercept + inoffice,
y = logit(intercept + inoffice), yend = 0), linetype = "dashed", color = "grey") +
geom_segment(aes(x = intercept + mentor + inoffice + interact,
xend = intercept + mentor + inoffice + interact,
y = logit(intercept + mentor + inoffice + interact), yend = 0),
linetype = "dashed", color = "grey") +
# Vertical
geom_segment(aes(x = -5, xend = -5, y = logit(intercept + inoffice),
yend = logit(intercept + mentor + inoffice + interact)), color = "red", size = 2) +
geom_segment(aes(x = -5, xend = intercept + inoffice,
y = logit(intercept + inoffice), yend = logit(intercept + inoffice)), linetype = "dashed", color = "grey") +
geom_segment(aes(x = -5, xend = intercept + mentor + inoffice + interact,
y = logit(intercept + mentor + inoffice + interact),
yend = logit(intercept + mentor + inoffice + interact)), linetype = "dashed", color = "grey") +
annotate("text", x = -4, y = 0.5, label = "On-site", color = "red") +
annotate("text", x = -4, y = 0.09, label = "Remote", color = "blue") +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
ggsave("plots/log_function.png", width = 4, height = 3)
ggsave("plots/log_function.pdf", width = 4, height = 3)