source("0__helpers.R")
opts_chunk$set(warning=TRUE, cache=F,cache.lazy=F,tidy=FALSE,autodep=TRUE,dev=c('png','pdf'),fig.width=20,fig.height=12.5,out.width='1440px',out.height='900px',cache.extra=file.info('swed1.rdata')[, 'mtime'])
make_path = function(file) {
get_coefficient_path(file, "swed")
}
# options for each chunk calling knit_child
opts_chunk$set(warning=FALSE, message = FALSE)
The swed_subset_children.1
dataset is based on the full dataset of all participants where paternal age is known and birth years are from 1947 to 1959. The subset contains 117726 randomly drawn participants from 80000 families.
All of the following models are the same as our main model m3, except for the noted changes to test robustness.
For the four historical populations, we imposed quite stringent exclusion criteria to ensure sufficient data quality for our intended analysis. This was not necessary for the modern Swedish data, because there were no exclusion criteria to relax.
model_filename = make_path("r1_relaxed_exclusion_criteria")
if (file.exists(model_filename)) {
cat(summarise_model())
r1 = model
}
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 1 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 117726)
## Samples: 6 chains, each with iter = 800; warmup = 300; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## sd ~ student_t(3, 0, 10)
##
## Group-Level Effects:
## ~idParents (Number of levels: 75475)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.02 438 1.01
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.72 0.02 0.69 0.75 3000
## paternalage -0.05 0.01 -0.08 -0.02 1375
## birth_cohort1950M1955 0.00 0.01 -0.02 0.01 3000
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 3000
## male1 -0.06 0.00 -0.07 -0.05 3000
## maternalage.factor1420 0.07 0.01 0.06 0.09 3000
## maternalage.factor3561 -0.01 0.01 -0.02 0.01 3000
## paternalage.mean 0.01 0.01 -0.02 0.04 1388
## paternal_loss01 0.19 0.28 -0.38 0.69 3000
## paternal_loss15 -0.06 0.08 -0.21 0.09 3000
## paternal_loss510 -0.06 0.03 -0.12 0.00 3000
## paternal_loss1015 -0.02 0.02 -0.06 0.02 3000
## paternal_loss1520 0.00 0.01 -0.03 0.03 3000
## paternal_loss2025 0.00 0.01 -0.02 0.02 3000
## paternal_loss2530 -0.02 0.01 -0.04 0.00 3000
## paternal_loss3035 0.00 0.01 -0.02 0.01 3000
## paternal_loss3540 0.00 0.01 -0.02 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear -0.06 0.01 -0.07 -0.04 3000
## maternal_loss01 -0.30 0.22 -0.76 0.10 3000
## maternal_loss15 -0.02 0.10 -0.22 0.18 3000
## maternal_loss510 0.00 0.05 -0.09 0.09 3000
## maternal_loss1015 -0.02 0.03 -0.07 0.04 3000
## maternal_loss1520 -0.01 0.02 -0.05 0.03 3000
## maternal_loss2025 0.03 0.02 -0.01 0.06 3000
## maternal_loss2530 0.00 0.01 -0.03 0.03 3000
## maternal_loss3035 0.00 0.01 -0.02 0.02 3000
## maternal_loss3540 0.00 0.01 -0.02 0.02 3000
## maternal_loss4045 -0.01 0.01 -0.03 0.01 3000
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000
## older_siblings1 0.02 0.01 0.00 0.03 1642
## older_siblings2 0.03 0.01 0.00 0.05 1404
## older_siblings3 0.04 0.02 0.01 0.08 1445
## older_siblings4 0.01 0.02 -0.04 0.06 1610
## older_siblings5P -0.04 0.03 -0.10 0.02 1600
## nr.siblings 0.04 0.00 0.03 0.04 1515
## last_born1 0.01 0.00 0.00 0.02 3000
## Rhat
## Intercept 1
## paternalage 1
## birth_cohort1950M1955 1
## birth_cohort1955M1960 1
## male1 1
## maternalage.factor1420 1
## maternalage.factor3561 1
## paternalage.mean 1
## paternal_loss01 1
## paternal_loss15 1
## paternal_loss510 1
## paternal_loss1015 1
## paternal_loss1520 1
## paternal_loss2025 1
## paternal_loss2530 1
## paternal_loss3035 1
## paternal_loss3540 1
## paternal_loss4045 1
## paternal_lossunclear 1
## maternal_loss01 1
## maternal_loss15 1
## maternal_loss510 1
## maternal_loss1015 1
## maternal_loss1520 1
## maternal_loss2025 1
## maternal_loss2530 1
## maternal_loss3035 1
## maternal_loss3540 1
## maternal_loss4045 1
## maternal_lossunclear 1
## older_siblings1 1
## older_siblings2 1
## older_siblings3 1
## older_siblings4 1
## older_siblings5P 1
## nr.siblings 1
## last_born1 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.061 | 1.999 | 2.123 |
paternalage | 0.9505 | 0.9233 | 0.9782 |
birth_cohort1950M1955 | 0.9955 | 0.9846 | 1.007 |
birth_cohort1955M1960 | 1.001 | 0.9893 | 1.013 |
male1 | 0.9419 | 0.9338 | 0.9496 |
maternalage.factor1420 | 1.074 | 1.057 | 1.091 |
maternalage.factor3561 | 0.9913 | 0.9757 | 1.007 |
paternalage.mean | 1.007 | 0.9787 | 1.037 |
paternal_loss01 | 1.209 | 0.6848 | 1.984 |
paternal_loss15 | 0.9425 | 0.8074 | 1.095 |
paternal_loss510 | 0.9399 | 0.8834 | 0.9992 |
paternal_loss1015 | 0.9804 | 0.9427 | 1.02 |
paternal_loss1520 | 1 | 0.9749 | 1.027 |
paternal_loss2025 | 0.9972 | 0.9757 | 1.019 |
paternal_loss2530 | 0.9841 | 0.965 | 1.003 |
paternal_loss3035 | 0.9963 | 0.98 | 1.013 |
paternal_loss3540 | 0.9961 | 0.9809 | 1.012 |
paternal_loss4045 | 0.9863 | 0.9708 | 1.002 |
paternal_lossunclear | 0.9453 | 0.9344 | 0.9565 |
maternal_loss01 | 0.7418 | 0.47 | 1.11 |
maternal_loss15 | 0.9842 | 0.8018 | 1.203 |
maternal_loss510 | 1.001 | 0.9108 | 1.096 |
maternal_loss1015 | 0.9814 | 0.9292 | 1.036 |
maternal_loss1520 | 0.9911 | 0.952 | 1.031 |
maternal_loss2025 | 1.026 | 0.9929 | 1.06 |
maternal_loss2530 | 1 | 0.9698 | 1.029 |
maternal_loss3035 | 0.9997 | 0.9769 | 1.023 |
maternal_loss3540 | 1.003 | 0.9838 | 1.023 |
maternal_loss4045 | 0.9914 | 0.9731 | 1.01 |
maternal_lossunclear | 0.9808 | 0.9707 | 0.991 |
older_siblings1 | 1.018 | 1.004 | 1.033 |
older_siblings2 | 1.028 | 1.004 | 1.054 |
older_siblings3 | 1.042 | 1.006 | 1.08 |
older_siblings4 | 1.01 | 0.9624 | 1.06 |
older_siblings5P | 0.9597 | 0.9038 | 1.02 |
nr.siblings | 1.038 | 1.032 | 1.044 |
last_born1 | 1.009 | 0.9999 | 1.019 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.93 | [1.89;1.96] | [1.91;1.95] |
estimate father 35y | 1.83 | [1.79;1.87] | [1.81;1.86] |
percentage change | -4.96 | [-7.67;-2.18] | [-6.73;-3.14] |
OR/IRR | 0.95 | [0.92;0.98] | [0.93;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r1_relaxed_exclusion_criteria.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Adding covariates increases the complexity of the model and makes it harder to interpret. We chose to adjust for many potential confounds because we are interested in causal isolation of the paternal age effect. Here we show what happens when only birth cohort and average paternal age in the family are adjusted for.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 73 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + paternalage.mean + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 1500; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.02 0.01 0 0.04 99 1.06
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 0.72 0.01 0.70 0.75 3000 1
## paternalage -0.03 0.01 -0.04 -0.02 3000 1
## birth_cohort1950M1955 -0.01 0.01 -0.02 0.00 3000 1
## birth_cohort1955M1960 -0.02 0.01 -0.03 -0.01 3000 1
## paternalage.mean 0.00 0.01 -0.01 0.02 3000 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.064 | 2.014 | 2.115 |
paternalage | 0.9679 | 0.9563 | 0.98 |
birth_cohort1950M1955 | 0.9899 | 0.9796 | 1 |
birth_cohort1955M1960 | 0.9806 | 0.9707 | 0.9907 |
paternalage.mean | 1.001 | 0.9865 | 1.015 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.91 | [1.89;1.93] | [1.9;1.92] |
estimate father 35y | 1.85 | [1.83;1.87] | [1.84;1.86] |
percentage change | -3.2 | [-4.37;-2] | [-3.97;-2.43] |
OR/IRR | 0.97 | [0.96;0.98] | [0.96;0.98] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r2_few_controls.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
We chose to control for birth order/number of older siblings as a categorical variable, lumping all those who had more than 5 in the category 5+. Because a continuous covariate is also plausible, we tested this alternative model as well.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 1408177)
## Samples: 6 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 884975)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0 0 0 0.01 206 1.02
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.75 0.00 0.74 0.76 3000
## paternalage -0.02 0.00 -0.03 -0.01 3000
## birth_cohort1950M1955 0.00 0.00 0.00 0.00 3000
## birth_cohort1955M1960 0.00 0.00 0.00 0.01 3000
## male1 -0.06 0.00 -0.07 -0.06 3000
## maternalage.factor1420 0.05 0.00 0.05 0.05 3000
## maternalage.factor3561 -0.01 0.00 -0.01 0.00 3000
## paternalage.mean -0.03 0.00 -0.03 -0.02 3000
## paternal_loss01 0.11 0.05 0.01 0.21 3000
## paternal_loss15 0.03 0.02 -0.01 0.07 3000
## paternal_loss510 -0.02 0.01 -0.04 0.00 3000
## paternal_loss1015 -0.01 0.01 -0.02 0.00 3000
## paternal_loss1520 0.00 0.00 0.00 0.01 3000
## paternal_loss2025 0.00 0.00 -0.01 0.00 3000
## paternal_loss2530 0.00 0.00 0.00 0.01 3000
## paternal_loss3035 0.00 0.00 -0.01 0.00 3000
## paternal_loss3540 0.00 0.00 -0.01 0.00 3000
## paternal_loss4045 -0.01 0.00 -0.01 0.00 3000
## paternal_lossunclear -0.06 0.00 -0.06 -0.05 3000
## maternal_loss01 -0.20 0.07 -0.34 -0.06 3000
## maternal_loss15 -0.06 0.03 -0.12 -0.01 3000
## maternal_loss510 -0.01 0.01 -0.04 0.02 3000
## maternal_loss1015 -0.01 0.01 -0.02 0.01 3000
## maternal_loss1520 -0.01 0.01 -0.02 0.01 3000
## maternal_loss2025 0.01 0.01 0.00 0.02 3000
## maternal_loss2530 0.00 0.00 0.00 0.01 3000
## maternal_loss3035 0.00 0.00 0.00 0.01 3000
## maternal_loss3540 -0.01 0.00 -0.01 0.00 3000
## maternal_loss4045 0.00 0.00 -0.01 0.00 3000
## maternal_lossunclear -0.02 0.00 -0.02 -0.02 3000
## older_siblings -0.01 0.00 -0.01 -0.01 3000
## nr.siblings 0.04 0.00 0.04 0.04 3000
## last_born1 0.01 0.00 0.01 0.01 3000
## Rhat
## Intercept 1
## paternalage 1
## birth_cohort1950M1955 1
## birth_cohort1955M1960 1
## male1 1
## maternalage.factor1420 1
## maternalage.factor3561 1
## paternalage.mean 1
## paternal_loss01 1
## paternal_loss15 1
## paternal_loss510 1
## paternal_loss1015 1
## paternal_loss1520 1
## paternal_loss2025 1
## paternal_loss2530 1
## paternal_loss3035 1
## paternal_loss3540 1
## paternal_loss4045 1
## paternal_lossunclear 1
## maternal_loss01 1
## maternal_loss15 1
## maternal_loss510 1
## maternal_loss1015 1
## maternal_loss1520 1
## maternal_loss2025 1
## maternal_loss2530 1
## maternal_loss3035 1
## maternal_loss3540 1
## maternal_loss4045 1
## maternal_lossunclear 1
## older_siblings 1
## nr.siblings 1
## last_born1 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.123 | 2.104 | 2.142 |
paternalage | 0.9792 | 0.9713 | 0.9871 |
birth_cohort1950M1955 | 1.001 | 0.9976 | 1.004 |
birth_cohort1955M1960 | 1.002 | 0.9985 | 1.005 |
male1 | 0.9384 | 0.936 | 0.9408 |
maternalage.factor1420 | 1.052 | 1.047 | 1.056 |
maternalage.factor3561 | 0.9935 | 0.9892 | 0.9977 |
paternalage.mean | 0.9748 | 0.9668 | 0.9828 |
paternal_loss01 | 1.12 | 1.01 | 1.237 |
paternal_loss15 | 1.029 | 0.9909 | 1.067 |
paternal_loss510 | 0.9811 | 0.9648 | 0.9981 |
paternal_loss1015 | 0.9946 | 0.9846 | 1.004 |
paternal_loss1520 | 1.004 | 0.9953 | 1.011 |
paternal_loss2025 | 0.9973 | 0.9908 | 1.004 |
paternal_loss2530 | 1.002 | 0.997 | 1.008 |
paternal_loss3035 | 0.9996 | 0.9947 | 1.004 |
paternal_loss3540 | 0.9957 | 0.9913 | 1 |
paternal_loss4045 | 0.9914 | 0.9868 | 0.996 |
paternal_lossunclear | 0.9451 | 0.9419 | 0.9484 |
maternal_loss01 | 0.8192 | 0.7137 | 0.9402 |
maternal_loss15 | 0.9424 | 0.8892 | 0.9945 |
maternal_loss510 | 0.9889 | 0.9628 | 1.015 |
maternal_loss1015 | 0.9933 | 0.9779 | 1.009 |
maternal_loss1520 | 0.9941 | 0.9826 | 1.006 |
maternal_loss2025 | 1.008 | 0.9978 | 1.018 |
maternal_loss2530 | 1.004 | 0.9962 | 1.013 |
maternal_loss3035 | 1.004 | 0.9974 | 1.012 |
maternal_loss3540 | 0.995 | 0.9889 | 1.001 |
maternal_loss4045 | 0.9986 | 0.9932 | 1.004 |
maternal_lossunclear | 0.9795 | 0.9765 | 0.9825 |
older_siblings | 0.9899 | 0.9871 | 0.9927 |
nr.siblings | 1.043 | 1.041 | 1.044 |
last_born1 | 1.011 | 1.009 | 1.014 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.9 | [1.89;1.92] | [1.89;1.91] |
estimate father 35y | 1.86 | [1.85;1.87] | [1.86;1.87] |
percentage change | -2.08 | [-2.87;-1.29] | [-2.61;-1.56] |
OR/IRR | 0.98 | [0.97;0.99] | [0.97;0.98] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r3_birth_order_continuous.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Birth order is usually used as a proxy variable for parental investment, the assumption being that older siblings require parental attention. However, there are are reasons to doubt this, as fully-grown siblings probably do not compete for the same resources. To compute a clearer proxy variable of competing siblings, we computed and adjusted for the number of siblings who were alive and younger than five at the time of birth of the anchor child.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 1 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + nr.siblings + dependent_sibs_f5y + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.02 453 1.02
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.75 0.01 0.72 0.78 3000
## paternalage -0.04 0.01 -0.06 -0.03 3000
## birth_cohort1950M1955 0.00 0.01 -0.02 0.01 3000
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 3000
## male1 -0.07 0.00 -0.07 -0.06 3000
## maternalage.factor1420 0.06 0.01 0.05 0.08 3000
## maternalage.factor3561 -0.01 0.01 -0.03 0.00 3000
## paternalage.mean -0.01 0.01 -0.02 0.01 3000
## paternal_loss01 0.09 0.21 -0.36 0.48 3000
## paternal_loss15 -0.02 0.06 -0.14 0.10 3000
## paternal_loss510 -0.06 0.03 -0.11 0.00 3000
## paternal_loss1015 -0.02 0.02 -0.05 0.01 3000
## paternal_loss1520 0.00 0.01 -0.02 0.03 3000
## paternal_loss2025 0.00 0.01 -0.03 0.02 3000
## paternal_loss2530 -0.01 0.01 -0.03 0.00 3000
## paternal_loss3035 -0.01 0.01 -0.02 0.01 3000
## paternal_loss3540 -0.01 0.01 -0.02 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear -0.06 0.01 -0.07 -0.05 3000
## maternal_loss01 -0.27 0.20 -0.66 0.12 3000
## maternal_loss15 -0.07 0.09 -0.26 0.10 3000
## maternal_loss510 0.01 0.04 -0.08 0.09 3000
## maternal_loss1015 -0.02 0.03 -0.07 0.04 3000
## maternal_loss1520 -0.01 0.02 -0.05 0.03 3000
## maternal_loss2025 0.02 0.02 -0.01 0.06 3000
## maternal_loss2530 0.00 0.01 -0.03 0.03 3000
## maternal_loss3035 0.00 0.01 -0.02 0.02 3000
## maternal_loss3540 0.00 0.01 -0.02 0.02 3000
## maternal_loss4045 0.00 0.01 -0.02 0.01 3000
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000
## nr.siblings 0.04 0.00 0.04 0.05 3000
## dependent_sibs_f5y -0.01 0.00 -0.01 0.00 3000
## Rhat
## Intercept 1
## paternalage 1
## birth_cohort1950M1955 1
## birth_cohort1955M1960 1
## male1 1
## maternalage.factor1420 1
## maternalage.factor3561 1
## paternalage.mean 1
## paternal_loss01 1
## paternal_loss15 1
## paternal_loss510 1
## paternal_loss1015 1
## paternal_loss1520 1
## paternal_loss2025 1
## paternal_loss2530 1
## paternal_loss3035 1
## paternal_loss3540 1
## paternal_loss4045 1
## paternal_lossunclear 1
## maternal_loss01 1
## maternal_loss15 1
## maternal_loss510 1
## maternal_loss1015 1
## maternal_loss1520 1
## maternal_loss2025 1
## maternal_loss2530 1
## maternal_loss3035 1
## maternal_loss3540 1
## maternal_loss4045 1
## maternal_lossunclear 1
## nr.siblings 1
## dependent_sibs_f5y 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.115 | 2.054 | 2.176 |
paternalage | 0.9603 | 0.9463 | 0.9744 |
birth_cohort1950M1955 | 0.9951 | 0.985 | 1.006 |
birth_cohort1955M1960 | 0.9981 | 0.9868 | 1.009 |
male1 | 0.9365 | 0.9289 | 0.9439 |
maternalage.factor1420 | 1.062 | 1.047 | 1.079 |
maternalage.factor3561 | 0.9852 | 0.9708 | 1 |
paternalage.mean | 0.9943 | 0.9796 | 1.01 |
paternal_loss01 | 1.092 | 0.6978 | 1.61 |
paternal_loss15 | 0.9813 | 0.8724 | 1.103 |
paternal_loss510 | 0.9465 | 0.8918 | 1.003 |
paternal_loss1015 | 0.9812 | 0.948 | 1.015 |
paternal_loss1520 | 1.003 | 0.9758 | 1.029 |
paternal_loss2025 | 0.995 | 0.974 | 1.016 |
paternal_loss2530 | 0.9861 | 0.9678 | 1.005 |
paternal_loss3035 | 0.9944 | 0.9778 | 1.011 |
paternal_loss3540 | 0.9943 | 0.9798 | 1.009 |
paternal_loss4045 | 0.9861 | 0.9721 | 1.001 |
paternal_lossunclear | 0.9448 | 0.9345 | 0.9556 |
maternal_loss01 | 0.7647 | 0.5154 | 1.124 |
maternal_loss15 | 0.9293 | 0.7708 | 1.11 |
maternal_loss510 | 1.007 | 0.922 | 1.097 |
maternal_loss1015 | 0.9823 | 0.9321 | 1.037 |
maternal_loss1520 | 0.9903 | 0.9522 | 1.028 |
maternal_loss2025 | 1.024 | 0.9911 | 1.058 |
maternal_loss2530 | 0.9988 | 0.9722 | 1.027 |
maternal_loss3035 | 0.9997 | 0.9765 | 1.024 |
maternal_loss3540 | 1.003 | 0.9832 | 1.023 |
maternal_loss4045 | 0.9953 | 0.9782 | 1.013 |
maternal_lossunclear | 0.9815 | 0.9721 | 0.9916 |
nr.siblings | 1.043 | 1.039 | 1.047 |
dependent_sibs_f5y | 0.9927 | 0.9865 | 0.9989 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.94 | [1.92;1.97] | [1.93;1.96] |
estimate father 35y | 1.87 | [1.84;1.89] | [1.85;1.88] |
percentage change | -3.97 | [-5.37;-2.56] | [-4.89;-3.06] |
OR/IRR | 0.96 | [0.95;0.97] | [0.95;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r4_control_dependent_sibs.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Plausibly, being first-born has a different effect, when one is an only child as opposed to having two siblings, etc. Here, we allow for such an interaction effect.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 3 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings * nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 800; warmup = 300; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.02 438 1
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI
## Intercept 0.72 0.02 0.69 0.75
## paternalage -0.05 0.01 -0.07 -0.02
## birth_cohort1950M1955 0.00 0.01 -0.02 0.01
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01
## male1 -0.07 0.00 -0.07 -0.06
## maternalage.factor1420 0.07 0.01 0.05 0.08
## maternalage.factor3561 -0.01 0.01 -0.02 0.01
## paternalage.mean 0.00 0.01 -0.03 0.03
## paternal_loss01 0.09 0.22 -0.36 0.51
## paternal_loss15 -0.02 0.06 -0.14 0.10
## paternal_loss510 -0.05 0.03 -0.11 0.00
## paternal_loss1015 -0.02 0.02 -0.05 0.02
## paternal_loss1520 0.00 0.01 -0.02 0.03
## paternal_loss2025 0.00 0.01 -0.03 0.02
## paternal_loss2530 -0.01 0.01 -0.03 0.01
## paternal_loss3035 -0.01 0.01 -0.02 0.01
## paternal_loss3540 -0.01 0.01 -0.02 0.01
## paternal_loss4045 -0.01 0.01 -0.03 0.00
## paternal_lossunclear -0.06 0.01 -0.07 -0.05
## maternal_loss01 -0.26 0.21 -0.68 0.12
## maternal_loss15 -0.06 0.09 -0.25 0.12
## maternal_loss510 0.01 0.04 -0.07 0.10
## maternal_loss1015 -0.01 0.03 -0.07 0.04
## maternal_loss1520 -0.01 0.02 -0.05 0.03
## maternal_loss2025 0.03 0.02 -0.01 0.06
## maternal_loss2530 0.00 0.01 -0.03 0.03
## maternal_loss3035 0.00 0.01 -0.02 0.02
## maternal_loss3540 0.01 0.01 -0.01 0.02
## maternal_loss4045 0.00 0.01 -0.02 0.01
## maternal_lossunclear -0.02 0.01 -0.03 -0.01
## older_siblings1 0.03 0.01 0.01 0.05
## older_siblings2 0.07 0.02 0.03 0.10
## older_siblings3 0.06 0.03 0.00 0.12
## older_siblings4 0.08 0.06 -0.03 0.19
## older_siblings5P 0.11 0.06 0.00 0.22
## nr.siblings 0.05 0.00 0.04 0.05
## last_born1 0.01 0.00 0.00 0.02
## older_siblings1:nr.siblings -0.01 0.00 -0.02 0.00
## older_siblings2:nr.siblings -0.02 0.01 -0.03 -0.01
## older_siblings3:nr.siblings -0.01 0.01 -0.03 0.00
## older_siblings4:nr.siblings -0.02 0.01 -0.04 0.00
## older_siblings5P:nr.siblings -0.03 0.01 -0.05 -0.01
## Eff.Sample Rhat
## Intercept 3000 1
## paternalage 3000 1
## birth_cohort1950M1955 3000 1
## birth_cohort1955M1960 3000 1
## male1 2879 1
## maternalage.factor1420 3000 1
## maternalage.factor3561 3000 1
## paternalage.mean 3000 1
## paternal_loss01 3000 1
## paternal_loss15 3000 1
## paternal_loss510 3000 1
## paternal_loss1015 3000 1
## paternal_loss1520 3000 1
## paternal_loss2025 3000 1
## paternal_loss2530 3000 1
## paternal_loss3035 3000 1
## paternal_loss3540 3000 1
## paternal_loss4045 3000 1
## paternal_lossunclear 3000 1
## maternal_loss01 3000 1
## maternal_loss15 3000 1
## maternal_loss510 3000 1
## maternal_loss1015 3000 1
## maternal_loss1520 3000 1
## maternal_loss2025 3000 1
## maternal_loss2530 3000 1
## maternal_loss3035 3000 1
## maternal_loss3540 3000 1
## maternal_loss4045 3000 1
## maternal_lossunclear 3000 1
## older_siblings1 3000 1
## older_siblings2 3000 1
## older_siblings3 3000 1
## older_siblings4 3000 1
## older_siblings5P 3000 1
## nr.siblings 3000 1
## last_born1 3000 1
## older_siblings1:nr.siblings 3000 1
## older_siblings2:nr.siblings 3000 1
## older_siblings3:nr.siblings 3000 1
## older_siblings4:nr.siblings 3000 1
## older_siblings5P:nr.siblings 3000 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.056 | 1.994 | 2.117 |
paternalage | 0.9548 | 0.9295 | 0.9815 |
birth_cohort1950M1955 | 0.9952 | 0.9848 | 1.006 |
birth_cohort1955M1960 | 0.9972 | 0.9862 | 1.008 |
male1 | 0.9365 | 0.9289 | 0.9442 |
maternalage.factor1420 | 1.068 | 1.051 | 1.084 |
maternalage.factor3561 | 0.9917 | 0.9767 | 1.006 |
paternalage.mean | 1 | 0.9736 | 1.029 |
paternal_loss01 | 1.096 | 0.699 | 1.667 |
paternal_loss15 | 0.9833 | 0.8713 | 1.106 |
paternal_loss510 | 0.9488 | 0.8964 | 1.005 |
paternal_loss1015 | 0.9824 | 0.9467 | 1.019 |
paternal_loss1520 | 1.004 | 0.9789 | 1.031 |
paternal_loss2025 | 0.996 | 0.9749 | 1.018 |
paternal_loss2530 | 0.9874 | 0.9691 | 1.006 |
paternal_loss3035 | 0.9949 | 0.9779 | 1.011 |
paternal_loss3540 | 0.9945 | 0.9803 | 1.01 |
paternal_loss4045 | 0.9864 | 0.9717 | 1.001 |
paternal_lossunclear | 0.9456 | 0.9351 | 0.9559 |
maternal_loss01 | 0.7733 | 0.5056 | 1.125 |
maternal_loss15 | 0.9403 | 0.7772 | 1.124 |
maternal_loss510 | 1.013 | 0.9314 | 1.1 |
maternal_loss1015 | 0.9852 | 0.9349 | 1.039 |
maternal_loss1520 | 0.9931 | 0.9553 | 1.032 |
maternal_loss2025 | 1.026 | 0.9917 | 1.062 |
maternal_loss2530 | 1.001 | 0.9721 | 1.03 |
maternal_loss3035 | 1.001 | 0.9789 | 1.024 |
maternal_loss3540 | 1.005 | 0.9859 | 1.024 |
maternal_loss4045 | 0.9963 | 0.9775 | 1.015 |
maternal_lossunclear | 0.9815 | 0.9714 | 0.9918 |
older_siblings1 | 1.03 | 1.011 | 1.051 |
older_siblings2 | 1.068 | 1.029 | 1.108 |
older_siblings3 | 1.062 | 0.9988 | 1.133 |
older_siblings4 | 1.084 | 0.9688 | 1.212 |
older_siblings5P | 1.117 | 0.9954 | 1.249 |
nr.siblings | 1.049 | 1.042 | 1.056 |
last_born1 | 1.013 | 1.004 | 1.022 |
older_siblings1:nr.siblings | 0.9892 | 0.9811 | 0.9975 |
older_siblings2:nr.siblings | 0.9794 | 0.9685 | 0.9903 |
older_siblings3:nr.siblings | 0.9869 | 0.9719 | 1.002 |
older_siblings4:nr.siblings | 0.9787 | 0.957 | 1.001 |
older_siblings5P:nr.siblings | 0.9691 | 0.953 | 0.9852 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.92 | [1.89;1.96] | [1.9;1.95] |
estimate father 35y | 1.84 | [1.8;1.87] | [1.81;1.86] |
percentage change | -4.52 | [-7.05;-1.85] | [-6.24;-2.76] |
OR/IRR | 0.95 | [0.93;0.98] | [0.94;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r5_birth_order_interact_siblings.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Paternal age and birth order are highly collinear with each other and with maternal age. Therefore, the choice to include this predictor widens standard errors for each predictor and may be disputed. Here we show what happens when we simply omit the birth order control.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 20 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + nr.siblings + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 800; warmup = 300; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.03 283 1.04
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.75 0.01 0.72 0.77 3000
## paternalage -0.04 0.01 -0.05 -0.02 3000
## birth_cohort1950M1955 0.00 0.01 -0.02 0.01 3000
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 3000
## male1 -0.07 0.00 -0.07 -0.06 2587
## maternalage.factor1420 0.06 0.01 0.05 0.08 3000
## maternalage.factor3561 -0.01 0.01 -0.03 0.00 3000
## paternalage.mean -0.01 0.01 -0.02 0.01 3000
## paternal_loss01 0.08 0.23 -0.38 0.49 3000
## paternal_loss15 -0.02 0.06 -0.13 0.09 3000
## paternal_loss510 -0.06 0.03 -0.11 0.00 3000
## paternal_loss1015 -0.02 0.02 -0.06 0.02 3000
## paternal_loss1520 0.00 0.01 -0.02 0.03 3000
## paternal_loss2025 -0.01 0.01 -0.03 0.02 3000
## paternal_loss2530 -0.01 0.01 -0.03 0.00 3000
## paternal_loss3035 -0.01 0.01 -0.02 0.01 3000
## paternal_loss3540 -0.01 0.01 -0.02 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear -0.06 0.01 -0.07 -0.05 3000
## maternal_loss01 -0.27 0.21 -0.70 0.12 3000
## maternal_loss15 -0.07 0.09 -0.26 0.10 3000
## maternal_loss510 0.01 0.04 -0.08 0.09 3000
## maternal_loss1015 -0.02 0.03 -0.07 0.04 3000
## maternal_loss1520 -0.01 0.02 -0.05 0.03 3000
## maternal_loss2025 0.02 0.02 -0.01 0.06 3000
## maternal_loss2530 0.00 0.01 -0.03 0.03 3000
## maternal_loss3035 0.00 0.01 -0.02 0.02 3000
## maternal_loss3540 0.00 0.01 -0.02 0.02 3000
## maternal_loss4045 0.00 0.01 -0.02 0.01 3000
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000
## nr.siblings 0.04 0.00 0.04 0.04 3000
## Rhat
## Intercept 1
## paternalage 1
## birth_cohort1950M1955 1
## birth_cohort1955M1960 1
## male1 1
## maternalage.factor1420 1
## maternalage.factor3561 1
## paternalage.mean 1
## paternal_loss01 1
## paternal_loss15 1
## paternal_loss510 1
## paternal_loss1015 1
## paternal_loss1520 1
## paternal_loss2025 1
## paternal_loss2530 1
## paternal_loss3035 1
## paternal_loss3540 1
## paternal_loss4045 1
## paternal_lossunclear 1
## maternal_loss01 1
## maternal_loss15 1
## maternal_loss510 1
## maternal_loss1015 1
## maternal_loss1520 1
## maternal_loss2025 1
## maternal_loss2530 1
## maternal_loss3035 1
## maternal_loss3540 1
## maternal_loss4045 1
## maternal_lossunclear 1
## nr.siblings 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.107 | 2.048 | 2.167 |
paternalage | 0.9629 | 0.9487 | 0.9771 |
birth_cohort1950M1955 | 0.9954 | 0.9849 | 1.006 |
birth_cohort1955M1960 | 0.9983 | 0.9866 | 1.01 |
male1 | 0.9365 | 0.9288 | 0.9444 |
maternalage.factor1420 | 1.064 | 1.048 | 1.08 |
maternalage.factor3561 | 0.9876 | 0.9731 | 1.002 |
paternalage.mean | 0.9922 | 0.9775 | 1.008 |
paternal_loss01 | 1.087 | 0.6805 | 1.633 |
paternal_loss15 | 0.9797 | 0.8764 | 1.094 |
paternal_loss510 | 0.9464 | 0.8963 | 0.9994 |
paternal_loss1015 | 0.9808 | 0.9462 | 1.017 |
paternal_loss1520 | 1.003 | 0.9772 | 1.03 |
paternal_loss2025 | 0.9947 | 0.9747 | 1.016 |
paternal_loss2530 | 0.9862 | 0.9675 | 1.004 |
paternal_loss3035 | 0.9945 | 0.9783 | 1.011 |
paternal_loss3540 | 0.9943 | 0.9797 | 1.009 |
paternal_loss4045 | 0.9861 | 0.9715 | 1.001 |
paternal_lossunclear | 0.9449 | 0.9347 | 0.9554 |
maternal_loss01 | 0.7639 | 0.4986 | 1.127 |
maternal_loss15 | 0.9291 | 0.7718 | 1.108 |
maternal_loss510 | 1.007 | 0.9253 | 1.096 |
maternal_loss1015 | 0.9829 | 0.9327 | 1.036 |
maternal_loss1520 | 0.9906 | 0.9506 | 1.03 |
maternal_loss2025 | 1.024 | 0.9918 | 1.057 |
maternal_loss2530 | 0.9992 | 0.9716 | 1.027 |
maternal_loss3035 | 0.9996 | 0.9766 | 1.024 |
maternal_loss3540 | 1.003 | 0.9842 | 1.023 |
maternal_loss4045 | 0.9954 | 0.9765 | 1.013 |
maternal_lossunclear | 0.9814 | 0.9714 | 0.9915 |
nr.siblings | 1.04 | 1.037 | 1.042 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.94 | [1.92;1.97] | [1.93;1.96] |
estimate father 35y | 1.87 | [1.85;1.9] | [1.86;1.89] |
percentage change | -3.7 | [-5.13;-2.29] | [-4.64;-2.79] |
OR/IRR | 0.96 | [0.95;0.98] | [0.95;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r6_no_birth_order_control.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
We adjusted for parental loss very stringently, including covariates for parental loss up to age 45. Here we show what happens, when we only control for parental loss in the first, and the first five years of life.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.03 186 1.02
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.65 0.11 0.43 0.86 2607
## paternalage -0.05 0.01 -0.07 -0.02 1326
## birth_cohort1950M1955 -0.01 0.01 -0.02 0.01 3000
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 3000
## male1 -0.07 0.00 -0.07 -0.06 3000
## maternalage.factor1420 0.07 0.01 0.05 0.08 3000
## maternalage.factor3561 -0.01 0.01 -0.02 0.01 3000
## paternalage.mean 0.00 0.01 -0.02 0.03 1301
## paternal_loss01 0.10 0.22 -0.35 0.52 3000
## paternal_losslater 0.01 0.06 -0.10 0.13 3000
## paternal_lossunclear -0.04 0.06 -0.15 0.08 3000
## maternal_loss01 -0.20 0.23 -0.64 0.23 3000
## maternal_losslater 0.07 0.09 -0.11 0.26 2591
## maternal_lossunclear 0.05 0.09 -0.13 0.24 2614
## older_siblings1 0.02 0.01 0.00 0.03 1565
## older_siblings2 0.02 0.01 0.00 0.05 1250
## older_siblings3 0.03 0.02 0.00 0.07 1368
## older_siblings4 0.01 0.02 -0.03 0.05 1543
## older_siblings5P -0.06 0.03 -0.11 0.00 1475
## nr.siblings 0.04 0.00 0.03 0.04 1397
## last_born1 0.01 0.00 0.00 0.02 3000
## Rhat
## Intercept 1.00
## paternalage 1.00
## birth_cohort1950M1955 1.00
## birth_cohort1955M1960 1.00
## male1 1.00
## maternalage.factor1420 1.00
## maternalage.factor3561 1.00
## paternalage.mean 1.00
## paternal_loss01 1.00
## paternal_losslater 1.00
## paternal_lossunclear 1.00
## maternal_loss01 1.00
## maternal_losslater 1.00
## maternal_lossunclear 1.00
## older_siblings1 1.00
## older_siblings2 1.00
## older_siblings3 1.00
## older_siblings4 1.00
## older_siblings5P 1.00
## nr.siblings 1.01
## last_born1 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 1.919 | 1.536 | 2.363 |
paternalage | 0.9521 | 0.9279 | 0.9781 |
birth_cohort1950M1955 | 0.9948 | 0.9846 | 1.005 |
birth_cohort1955M1960 | 0.9966 | 0.986 | 1.007 |
male1 | 0.9365 | 0.9289 | 0.9441 |
maternalage.factor1420 | 1.07 | 1.054 | 1.087 |
maternalage.factor3561 | 0.9932 | 0.9787 | 1.008 |
paternalage.mean | 1.002 | 0.9762 | 1.029 |
paternal_loss01 | 1.11 | 0.702 | 1.685 |
paternal_losslater | 1.011 | 0.9064 | 1.136 |
paternal_lossunclear | 0.9606 | 0.8601 | 1.079 |
maternal_loss01 | 0.8219 | 0.5254 | 1.265 |
maternal_losslater | 1.069 | 0.8986 | 1.297 |
maternal_lossunclear | 1.049 | 0.8801 | 1.272 |
older_siblings1 | 1.017 | 1.003 | 1.03 |
older_siblings2 | 1.024 | 1.001 | 1.046 |
older_siblings3 | 1.034 | 1.001 | 1.068 |
older_siblings4 | 1.008 | 0.9661 | 1.053 |
older_siblings5P | 0.9439 | 0.8915 | 0.9985 |
nr.siblings | 1.04 | 1.035 | 1.045 |
last_born1 | 1.011 | 1.002 | 1.02 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.92 | [1.89;1.95] | [1.9;1.94] |
estimate father 35y | 1.83 | [1.8;1.87] | [1.81;1.85] |
percentage change | -4.84 | [-7.21;-2.19] | [-6.39;-3.09] |
OR/IRR | 0.95 | [0.93;0.98] | [0.94;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r7_less_parental_loss_control.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Inheritance is linked to birth order and being male in several of the historical populations. Here, we adjust for the anchor being the first or last born adult son in a family. This implies that we control for our outcome to a certain extent, as “adult sons” cannot have died before adulthood, but a paternal age effect on mortality could still be detected for siblings other than the first- and last-born adults.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth.cohort + first_born_adult_male + last_born_adult_male + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 800; warmup = 300; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.02 432 1.01
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.74 0.01 0.71 0.77 3000
## paternalage -0.05 0.01 -0.07 -0.02 1439
## birth.cohort1950M1955 -0.01 0.01 -0.02 0.00 3000
## birth.cohort1955M1960 -0.01 0.01 -0.02 0.00 3000
## first_born_adult_male -0.18 0.06 -0.29 -0.07 3000
## last_born_adult_male -0.17 0.06 -0.28 -0.05 3000
## male1 -0.05 0.00 -0.06 -0.04 3000
## maternalage.factor1420 0.07 0.01 0.06 0.09 3000
## maternalage.factor3561 -0.01 0.01 -0.02 0.01 3000
## paternalage.mean 0.00 0.01 -0.02 0.03 1472
## paternal_loss01 0.08 0.21 -0.34 0.47 3000
## paternal_loss15 -0.02 0.06 -0.14 0.10 3000
## paternal_loss510 -0.05 0.03 -0.11 0.01 3000
## paternal_loss1015 -0.02 0.02 -0.05 0.02 3000
## paternal_loss1520 0.01 0.01 -0.02 0.03 3000
## paternal_loss2025 0.00 0.01 -0.02 0.02 3000
## paternal_loss2530 -0.01 0.01 -0.03 0.01 3000
## paternal_loss3035 0.00 0.01 -0.02 0.01 3000
## paternal_loss3540 -0.01 0.01 -0.02 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear -0.06 0.01 -0.07 -0.05 3000
## maternal_loss01 -0.23 0.21 -0.65 0.15 3000
## maternal_loss15 -0.05 0.09 -0.24 0.12 3000
## maternal_loss510 0.01 0.04 -0.07 0.09 3000
## maternal_loss1015 -0.02 0.03 -0.07 0.04 3000
## maternal_loss1520 -0.01 0.02 -0.05 0.03 3000
## maternal_loss2025 0.02 0.02 -0.01 0.06 3000
## maternal_loss2530 0.00 0.01 -0.02 0.03 3000
## maternal_loss3035 0.00 0.01 -0.02 0.02 3000
## maternal_loss3540 0.01 0.01 -0.01 0.02 3000
## maternal_loss4045 0.00 0.01 -0.02 0.01 3000
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000
## older_siblings1 0.02 0.01 0.00 0.03 1944
## older_siblings2 0.02 0.01 0.00 0.05 1480
## older_siblings3 0.03 0.02 0.00 0.07 1383
## older_siblings4 0.01 0.02 -0.03 0.05 1591
## older_siblings5P -0.06 0.03 -0.11 0.00 1779
## nr.siblings 0.04 0.00 0.03 0.04 1665
## last_born1 0.01 0.00 0.00 0.02 3000
## Rhat
## Intercept 1
## paternalage 1
## birth.cohort1950M1955 1
## birth.cohort1955M1960 1
## first_born_adult_male 1
## last_born_adult_male 1
## male1 1
## maternalage.factor1420 1
## maternalage.factor3561 1
## paternalage.mean 1
## paternal_loss01 1
## paternal_loss15 1
## paternal_loss510 1
## paternal_loss1015 1
## paternal_loss1520 1
## paternal_loss2025 1
## paternal_loss2530 1
## paternal_loss3035 1
## paternal_loss3540 1
## paternal_loss4045 1
## paternal_lossunclear 1
## maternal_loss01 1
## maternal_loss15 1
## maternal_loss510 1
## maternal_loss1015 1
## maternal_loss1520 1
## maternal_loss2025 1
## maternal_loss2530 1
## maternal_loss3035 1
## maternal_loss3540 1
## maternal_loss4045 1
## maternal_lossunclear 1
## older_siblings1 1
## older_siblings2 1
## older_siblings3 1
## older_siblings4 1
## older_siblings5P 1
## nr.siblings 1
## last_born1 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.094 | 2.034 | 2.153 |
paternalage | 0.9519 | 0.928 | 0.9758 |
birth.cohort1950M1955 | 0.9923 | 0.9819 | 1.003 |
birth.cohort1955M1960 | 0.9925 | 0.9812 | 1.004 |
first_born_adult_male | 0.834 | 0.7457 | 0.9284 |
last_born_adult_male | 0.8453 | 0.7568 | 0.947 |
male1 | 0.954 | 0.9465 | 0.9615 |
maternalage.factor1420 | 1.073 | 1.057 | 1.09 |
maternalage.factor3561 | 0.9935 | 0.9791 | 1.008 |
paternalage.mean | 1.002 | 0.9767 | 1.029 |
paternal_loss01 | 1.084 | 0.7115 | 1.605 |
paternal_loss15 | 0.9833 | 0.8704 | 1.105 |
paternal_loss510 | 0.9499 | 0.8977 | 1.006 |
paternal_loss1015 | 0.9819 | 0.9479 | 1.016 |
paternal_loss1520 | 1.006 | 0.9788 | 1.034 |
paternal_loss2025 | 0.9972 | 0.9766 | 1.018 |
paternal_loss2530 | 0.9879 | 0.9696 | 1.007 |
paternal_loss3035 | 0.9952 | 0.9788 | 1.011 |
paternal_loss3540 | 0.9949 | 0.9799 | 1.01 |
paternal_loss4045 | 0.9877 | 0.9731 | 1.002 |
paternal_lossunclear | 0.9444 | 0.9337 | 0.9549 |
maternal_loss01 | 0.7915 | 0.5222 | 1.158 |
maternal_loss15 | 0.9481 | 0.7891 | 1.13 |
maternal_loss510 | 1.013 | 0.9336 | 1.099 |
maternal_loss1015 | 0.985 | 0.933 | 1.039 |
maternal_loss1520 | 0.9946 | 0.956 | 1.033 |
maternal_loss2025 | 1.024 | 0.9913 | 1.059 |
maternal_loss2530 | 1.004 | 0.9768 | 1.031 |
maternal_loss3035 | 1.002 | 0.98 | 1.025 |
maternal_loss3540 | 1.005 | 0.9863 | 1.025 |
maternal_loss4045 | 0.9971 | 0.9791 | 1.015 |
maternal_lossunclear | 0.9797 | 0.9696 | 0.9896 |
older_siblings1 | 1.017 | 1.004 | 1.031 |
older_siblings2 | 1.025 | 1.003 | 1.048 |
older_siblings3 | 1.035 | 1.003 | 1.069 |
older_siblings4 | 1.01 | 0.9687 | 1.055 |
older_siblings5P | 0.9457 | 0.8945 | 0.9999 |
nr.siblings | 1.04 | 1.034 | 1.045 |
last_born1 | 1.01 | 1.001 | 1.019 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.9 | [1.87;1.93] | [1.88;1.92] |
estimate father 35y | 1.81 | [1.77;1.84] | [1.79;1.83] |
percentage change | -4.8 | [-7.2;-2.42] | [-6.44;-3.18] |
OR/IRR | 0.95 | [0.93;0.98] | [0.94;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r8_adjust_for_first_born_adult.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
In our main model, we control for birth cohort in 5-year-bins (lumping small bins). We chose to do so, because nonlinear and even sharply spiking effects of birth cohort are plausible (due to e.g. epidemics). This decision may be disputed, as it summarises 5-year-bins. Here, we instead allow for a thin-splate spline on the continuous birth year variable. This allows for smooth nonlinear (but not spiking) birth cohort effects.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 31 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + s(byear) + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 1500; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
## sds ~ student_t(3, 0, 10)
##
## Smooth Terms:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sds(sbyear_1) 0.03 0.02 0 0.09 738 1
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.02 251 1.01
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.73 0.01 0.70 0.76 3000
## paternalage -0.05 0.01 -0.08 -0.02 620
## male1 -0.07 0.00 -0.07 -0.06 3000
## maternalage.factor1420 0.07 0.01 0.05 0.08 3000
## maternalage.factor3561 -0.01 0.01 -0.02 0.01 3000
## paternalage.mean 0.00 0.01 -0.03 0.03 622
## paternal_loss01 0.10 0.23 -0.36 0.51 3000
## paternal_loss15 -0.02 0.06 -0.14 0.10 3000
## paternal_loss510 -0.05 0.03 -0.11 0.00 3000
## paternal_loss1015 -0.02 0.02 -0.05 0.02 3000
## paternal_loss1520 0.00 0.01 -0.02 0.03 3000
## paternal_loss2025 0.00 0.01 -0.03 0.02 3000
## paternal_loss2530 -0.01 0.01 -0.03 0.01 3000
## paternal_loss3035 0.00 0.01 -0.02 0.01 3000
## paternal_loss3540 -0.01 0.01 -0.02 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear -0.06 0.01 -0.07 -0.04 3000
## maternal_loss01 -0.26 0.21 -0.68 0.14 3000
## maternal_loss15 -0.06 0.09 -0.25 0.11 3000
## maternal_loss510 0.01 0.04 -0.07 0.09 3000
## maternal_loss1015 -0.02 0.03 -0.07 0.04 3000
## maternal_loss1520 -0.01 0.02 -0.05 0.03 3000
## maternal_loss2025 0.02 0.02 -0.01 0.06 3000
## maternal_loss2530 0.00 0.01 -0.03 0.03 3000
## maternal_loss3035 0.00 0.01 -0.02 0.02 3000
## maternal_loss3540 0.00 0.01 -0.01 0.02 3000
## maternal_loss4045 0.00 0.01 -0.02 0.01 3000
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000
## older_siblings1 0.02 0.01 0.00 0.03 746
## older_siblings2 0.02 0.01 0.00 0.05 618
## older_siblings3 0.03 0.02 0.00 0.07 676
## older_siblings4 0.01 0.02 -0.04 0.05 669
## older_siblings5P -0.06 0.03 -0.12 0.00 683
## nr.siblings 0.04 0.00 0.03 0.04 646
## last_born1 0.01 0.00 0.00 0.02 3000
## sbyear_1 -0.01 0.01 -0.03 0.02 1042
## Rhat
## Intercept 1.00
## paternalage 1.01
## male1 1.00
## maternalage.factor1420 1.00
## maternalage.factor3561 1.00
## paternalage.mean 1.01
## paternal_loss01 1.00
## paternal_loss15 1.00
## paternal_loss510 1.00
## paternal_loss1015 1.00
## paternal_loss1520 1.00
## paternal_loss2025 1.00
## paternal_loss2530 1.00
## paternal_loss3035 1.00
## paternal_loss3540 1.00
## paternal_loss4045 1.00
## paternal_lossunclear 1.00
## maternal_loss01 1.00
## maternal_loss15 1.00
## maternal_loss510 1.00
## maternal_loss1015 1.00
## maternal_loss1520 1.00
## maternal_loss2025 1.00
## maternal_loss2530 1.00
## maternal_loss3035 1.00
## maternal_loss3540 1.00
## maternal_loss4045 1.00
## maternal_lossunclear 1.00
## older_siblings1 1.01
## older_siblings2 1.01
## older_siblings3 1.01
## older_siblings4 1.01
## older_siblings5P 1.01
## nr.siblings 1.01
## last_born1 1.00
## sbyear_1 1.01
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.07 | 2.012 | 2.128 |
paternalage | 0.9538 | 0.9276 | 0.9802 |
male1 | 0.9364 | 0.9287 | 0.9442 |
maternalage.factor1420 | 1.071 | 1.055 | 1.087 |
maternalage.factor3561 | 0.9935 | 0.9792 | 1.008 |
paternalage.mean | 1.002 | 0.9736 | 1.029 |
paternal_loss01 | 1.101 | 0.6954 | 1.666 |
paternal_loss15 | 0.9846 | 0.8717 | 1.107 |
paternal_loss510 | 0.9491 | 0.8977 | 1.004 |
paternal_loss1015 | 0.9823 | 0.9486 | 1.017 |
paternal_loss1520 | 1.004 | 0.9768 | 1.031 |
paternal_loss2025 | 0.9963 | 0.9751 | 1.019 |
paternal_loss2530 | 0.9873 | 0.9689 | 1.006 |
paternal_loss3035 | 0.9954 | 0.9793 | 1.011 |
paternal_loss3540 | 0.9948 | 0.9803 | 1.01 |
paternal_loss4045 | 0.9866 | 0.9721 | 1.001 |
paternal_lossunclear | 0.9464 | 0.936 | 0.9568 |
maternal_loss01 | 0.7718 | 0.5069 | 1.145 |
maternal_loss15 | 0.9378 | 0.7772 | 1.121 |
maternal_loss510 | 1.01 | 0.9286 | 1.094 |
maternal_loss1015 | 0.9841 | 0.933 | 1.037 |
maternal_loss1520 | 0.9927 | 0.9554 | 1.029 |
maternal_loss2025 | 1.025 | 0.9919 | 1.059 |
maternal_loss2530 | 1 | 0.9736 | 1.028 |
maternal_loss3035 | 1.001 | 0.9777 | 1.024 |
maternal_loss3540 | 1.005 | 0.986 | 1.024 |
maternal_loss4045 | 0.9962 | 0.9779 | 1.014 |
maternal_lossunclear | 0.9817 | 0.972 | 0.9918 |
older_siblings1 | 1.017 | 1.003 | 1.03 |
older_siblings2 | 1.024 | 1.001 | 1.048 |
older_siblings3 | 1.034 | 1.001 | 1.068 |
older_siblings4 | 1.008 | 0.9629 | 1.053 |
older_siblings5P | 0.9438 | 0.8912 | 1.002 |
nr.siblings | 1.04 | 1.034 | 1.045 |
last_born1 | 1.011 | 1.002 | 1.02 |
sbyear_1 | 0.9933 | 0.9662 | 1.015 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.92 | [1.89;1.95] | [1.9;1.94] |
estimate father 35y | 1.83 | [1.79;1.87] | [1.81;1.86] |
percentage change | -4.6 | [-7.24;-1.98] | [-6.31;-2.93] |
OR/IRR | 0.95 | [0.93;0.98] | [0.94;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r9_continuous_byear_adjustment.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Paternal age effects may vary between different families. Although we did not explore between-family moderators of paternal age effects in our study, we tested whether modelling an additional group-level slope for paternal age differences within the family, would change the results by allowing for shrinkage and to examine the amount of inter-family differences to be explained for potential future moderator analysis.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 164 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 + paternalage | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 3500; warmup = 1500; thin = 5;
## total post-warmup samples = 2400
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## L ~ lkj_corr_cholesky(1)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## sd(Intercept) 0.02 0.01 0.00 0.05 310
## sd(paternalage) 0.01 0.00 0.00 0.02 175
## cor(Intercept,paternalage) -0.38 0.55 -0.99 0.86 494
## Rhat
## sd(Intercept) 1.02
## sd(paternalage) 1.04
## cor(Intercept,paternalage) 1.01
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.73 0.01 0.70 0.76 1802
## paternalage -0.05 0.01 -0.07 -0.02 1291
## birth_cohort1950M1955 0.00 0.01 -0.02 0.01 1106
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 1265
## male1 -0.07 0.00 -0.07 -0.06 808
## maternalage.factor1420 0.07 0.01 0.05 0.08 843
## maternalage.factor3561 -0.01 0.01 -0.02 0.01 1809
## paternalage.mean 0.00 0.01 -0.03 0.03 1264
## paternal_loss01 0.08 0.22 -0.35 0.47 1611
## paternal_loss15 -0.02 0.06 -0.14 0.09 1591
## paternal_loss510 -0.05 0.03 -0.11 0.01 1564
## paternal_loss1015 -0.02 0.02 -0.05 0.02 1532
## paternal_loss1520 0.00 0.01 -0.02 0.03 1550
## paternal_loss2025 0.00 0.01 -0.02 0.02 1680
## paternal_loss2530 -0.01 0.01 -0.03 0.00 1188
## paternal_loss3035 0.00 0.01 -0.02 0.01 977
## paternal_loss3540 -0.01 0.01 -0.02 0.01 1845
## paternal_loss4045 -0.01 0.01 -0.03 0.00 1666
## paternal_lossunclear -0.06 0.01 -0.07 -0.04 2039
## maternal_loss01 -0.26 0.21 -0.69 0.15 1130
## maternal_loss15 -0.07 0.09 -0.24 0.10 1841
## maternal_loss510 0.01 0.04 -0.07 0.09 2239
## maternal_loss1015 -0.02 0.03 -0.07 0.04 1545
## maternal_loss1520 -0.01 0.02 -0.05 0.03 1669
## maternal_loss2025 0.03 0.02 -0.01 0.06 1895
## maternal_loss2530 0.00 0.01 -0.03 0.03 1836
## maternal_loss3035 0.00 0.01 -0.02 0.02 1839
## maternal_loss3540 0.00 0.01 -0.01 0.02 1912
## maternal_loss4045 0.00 0.01 -0.02 0.01 1032
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 856
## older_siblings1 0.02 0.01 0.00 0.03 907
## older_siblings2 0.02 0.01 0.00 0.05 1082
## older_siblings3 0.03 0.02 0.00 0.07 1184
## older_siblings4 0.01 0.02 -0.04 0.05 1469
## older_siblings5P -0.06 0.03 -0.11 0.00 1450
## nr.siblings 0.04 0.00 0.03 0.04 1432
## last_born1 0.01 0.00 0.00 0.02 1674
## Rhat
## Intercept 1.00
## paternalage 1.00
## birth_cohort1950M1955 1.00
## birth_cohort1955M1960 1.00
## male1 1.00
## maternalage.factor1420 1.01
## maternalage.factor3561 1.00
## paternalage.mean 1.00
## paternal_loss01 1.00
## paternal_loss15 1.00
## paternal_loss510 1.00
## paternal_loss1015 1.00
## paternal_loss1520 1.00
## paternal_loss2025 1.00
## paternal_loss2530 1.00
## paternal_loss3035 1.00
## paternal_loss3540 1.00
## paternal_loss4045 1.00
## paternal_lossunclear 1.00
## maternal_loss01 1.00
## maternal_loss15 1.00
## maternal_loss510 1.00
## maternal_loss1015 1.00
## maternal_loss1520 1.00
## maternal_loss2025 1.00
## maternal_loss2530 1.00
## maternal_loss3035 1.00
## maternal_loss3540 1.00
## maternal_loss4045 1.00
## maternal_lossunclear 1.00
## older_siblings1 1.01
## older_siblings2 1.01
## older_siblings3 1.00
## older_siblings4 1.00
## older_siblings5P 1.00
## nr.siblings 1.00
## last_born1 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.076 | 2.021 | 2.138 |
paternalage | 0.9538 | 0.9285 | 0.9789 |
birth_cohort1950M1955 | 0.9953 | 0.9847 | 1.006 |
birth_cohort1955M1960 | 0.9975 | 0.9861 | 1.009 |
male1 | 0.9366 | 0.9288 | 0.9445 |
maternalage.factor1420 | 1.071 | 1.055 | 1.087 |
maternalage.factor3561 | 0.9935 | 0.9789 | 1.009 |
paternalage.mean | 1.001 | 0.9747 | 1.029 |
paternal_loss01 | 1.084 | 0.7042 | 1.596 |
paternal_loss15 | 0.9838 | 0.8732 | 1.1 |
paternal_loss510 | 0.951 | 0.8977 | 1.007 |
paternal_loss1015 | 0.9819 | 0.9483 | 1.016 |
paternal_loss1520 | 1.004 | 0.9771 | 1.031 |
paternal_loss2025 | 0.996 | 0.9757 | 1.017 |
paternal_loss2530 | 0.9869 | 0.9697 | 1.005 |
paternal_loss3035 | 0.9951 | 0.979 | 1.013 |
paternal_loss3540 | 0.9944 | 0.9798 | 1.009 |
paternal_loss4045 | 0.9864 | 0.9714 | 1.001 |
paternal_lossunclear | 0.946 | 0.9356 | 0.956 |
maternal_loss01 | 0.7741 | 0.5016 | 1.166 |
maternal_loss15 | 0.9369 | 0.7835 | 1.11 |
maternal_loss510 | 1.012 | 0.9317 | 1.097 |
maternal_loss1015 | 0.9843 | 0.9316 | 1.04 |
maternal_loss1520 | 0.9922 | 0.9548 | 1.032 |
maternal_loss2025 | 1.025 | 0.9928 | 1.058 |
maternal_loss2530 | 1 | 0.9725 | 1.029 |
maternal_loss3035 | 1.001 | 0.9785 | 1.025 |
maternal_loss3540 | 1.005 | 0.9854 | 1.024 |
maternal_loss4045 | 0.9961 | 0.9792 | 1.014 |
maternal_lossunclear | 0.9815 | 0.9715 | 0.9913 |
older_siblings1 | 1.017 | 1.003 | 1.031 |
older_siblings2 | 1.023 | 1.001 | 1.047 |
older_siblings3 | 1.033 | 1.001 | 1.067 |
older_siblings4 | 1.008 | 0.9641 | 1.051 |
older_siblings5P | 0.9437 | 0.8923 | 0.9971 |
nr.siblings | 1.04 | 1.035 | 1.045 |
last_born1 | 1.011 | 1.002 | 1.02 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.93 | [1.9;1.96] | [1.91;1.95] |
estimate father 35y | 1.84 | [1.8;1.88] | [1.81;1.86] |
percentage change | -4.57 | [-7.15;-2.11] | [-6.31;-2.92] |
OR/IRR | 0.95 | [0.93;0.98] | [0.94;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r10_add_random_slope.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Most anchors in our sample are full biological siblings and especially in the historical populations, divorce and remarriage was rare. Therefore, we chose to include only one group-level effect, for the parent couple (i.e. one group-level effect per father-mother-dyad). Including one intercept per parent is potentially a better way to adjust for genetic propensities inherited from either parent and allows estimating this propensity also from half-siblings, while half-sibling relationships were ignored in our main models. This comes at the cost of modelling complexity.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 1 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idMere) + (1 | idPere)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 10000; warmup = 4000; thin = 5;
## total post-warmup samples = 7200
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idMere (Number of levels: 79781)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.03 3611 1
##
## ~idPere (Number of levels: 79757)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.03 3490 1
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.73 0.01 0.70 0.76 7200
## paternalage -0.05 0.01 -0.07 -0.02 6992
## birth_cohort1950M1955 0.00 0.01 -0.02 0.01 7200
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 7200
## male1 -0.07 0.00 -0.07 -0.06 7200
## maternalage.factor1420 0.07 0.01 0.05 0.08 7200
## maternalage.factor3561 -0.01 0.01 -0.02 0.01 6991
## paternalage.mean 0.00 0.01 -0.03 0.03 6813
## paternal_loss01 0.09 0.22 -0.37 0.50 7200
## paternal_loss15 -0.02 0.06 -0.13 0.10 7200
## paternal_loss510 -0.05 0.03 -0.11 0.00 7200
## paternal_loss1015 -0.02 0.02 -0.05 0.02 7200
## paternal_loss1520 0.00 0.01 -0.02 0.03 7200
## paternal_loss2025 0.00 0.01 -0.03 0.02 7200
## paternal_loss2530 -0.01 0.01 -0.03 0.01 7200
## paternal_loss3035 0.00 0.01 -0.02 0.01 7200
## paternal_loss3540 -0.01 0.01 -0.02 0.01 7200
## paternal_loss4045 -0.01 0.01 -0.03 0.00 7200
## paternal_lossunclear -0.06 0.01 -0.07 -0.04 7200
## maternal_loss01 -0.26 0.21 -0.69 0.12 7200
## maternal_loss15 -0.07 0.09 -0.25 0.12 7200
## maternal_loss510 0.01 0.04 -0.07 0.09 7200
## maternal_loss1015 -0.02 0.03 -0.07 0.04 7200
## maternal_loss1520 -0.01 0.02 -0.05 0.03 7200
## maternal_loss2025 0.02 0.02 -0.01 0.06 7200
## maternal_loss2530 0.00 0.01 -0.03 0.03 7200
## maternal_loss3035 0.00 0.01 -0.02 0.02 7200
## maternal_loss3540 0.00 0.01 -0.01 0.02 7200
## maternal_loss4045 0.00 0.01 -0.02 0.01 7200
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 6713
## older_siblings1 0.02 0.01 0.00 0.03 6995
## older_siblings2 0.02 0.01 0.00 0.05 6885
## older_siblings3 0.03 0.02 0.00 0.07 6733
## older_siblings4 0.01 0.02 -0.04 0.05 6877
## older_siblings5P -0.06 0.03 -0.12 0.00 6949
## nr.siblings 0.04 0.00 0.03 0.04 6765
## last_born1 0.01 0.00 0.00 0.02 7200
## Rhat
## Intercept 1
## paternalage 1
## birth_cohort1950M1955 1
## birth_cohort1955M1960 1
## male1 1
## maternalage.factor1420 1
## maternalage.factor3561 1
## paternalage.mean 1
## paternal_loss01 1
## paternal_loss15 1
## paternal_loss510 1
## paternal_loss1015 1
## paternal_loss1520 1
## paternal_loss2025 1
## paternal_loss2530 1
## paternal_loss3035 1
## paternal_loss3540 1
## paternal_loss4045 1
## paternal_lossunclear 1
## maternal_loss01 1
## maternal_loss15 1
## maternal_loss510 1
## maternal_loss1015 1
## maternal_loss1520 1
## maternal_loss2025 1
## maternal_loss2530 1
## maternal_loss3035 1
## maternal_loss3540 1
## maternal_loss4045 1
## maternal_lossunclear 1
## older_siblings1 1
## older_siblings2 1
## older_siblings3 1
## older_siblings4 1
## older_siblings5P 1
## nr.siblings 1
## last_born1 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.076 | 2.016 | 2.137 |
paternalage | 0.9539 | 0.9286 | 0.981 |
birth_cohort1950M1955 | 0.9951 | 0.9847 | 1.006 |
birth_cohort1955M1960 | 0.9975 | 0.9863 | 1.008 |
male1 | 0.9364 | 0.9289 | 0.944 |
maternalage.factor1420 | 1.071 | 1.055 | 1.087 |
maternalage.factor3561 | 0.9935 | 0.9793 | 1.008 |
paternalage.mean | 1.001 | 0.9743 | 1.029 |
paternal_loss01 | 1.094 | 0.6919 | 1.645 |
paternal_loss15 | 0.9828 | 0.8758 | 1.101 |
paternal_loss510 | 0.9494 | 0.8963 | 1.004 |
paternal_loss1015 | 0.9818 | 0.9481 | 1.016 |
paternal_loss1520 | 1.004 | 0.9786 | 1.031 |
paternal_loss2025 | 0.9959 | 0.975 | 1.018 |
paternal_loss2530 | 0.987 | 0.9689 | 1.006 |
paternal_loss3035 | 0.9951 | 0.9788 | 1.011 |
paternal_loss3540 | 0.9946 | 0.98 | 1.01 |
paternal_loss4045 | 0.9865 | 0.9718 | 1.001 |
paternal_lossunclear | 0.9459 | 0.9356 | 0.9565 |
maternal_loss01 | 0.768 | 0.5013 | 1.128 |
maternal_loss15 | 0.9354 | 0.7768 | 1.123 |
maternal_loss510 | 1.011 | 0.9278 | 1.097 |
maternal_loss1015 | 0.9849 | 0.9347 | 1.039 |
maternal_loss1520 | 0.993 | 0.9548 | 1.033 |
maternal_loss2025 | 1.025 | 0.9931 | 1.059 |
maternal_loss2530 | 1.001 | 0.9729 | 1.029 |
maternal_loss3035 | 1.001 | 0.9776 | 1.024 |
maternal_loss3540 | 1.005 | 0.9858 | 1.025 |
maternal_loss4045 | 0.9961 | 0.9779 | 1.014 |
maternal_lossunclear | 0.9816 | 0.9718 | 0.9919 |
older_siblings1 | 1.017 | 1.003 | 1.03 |
older_siblings2 | 1.024 | 1 | 1.047 |
older_siblings3 | 1.033 | 0.9997 | 1.068 |
older_siblings4 | 1.008 | 0.9636 | 1.054 |
older_siblings5P | 0.9437 | 0.8912 | 0.999 |
nr.siblings | 1.04 | 1.034 | 1.046 |
last_born1 | 1.011 | 1.002 | 1.02 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.93 | [1.89;1.96] | [1.91;1.95] |
estimate father 35y | 1.84 | [1.8;1.88] | [1.81;1.86] |
percentage change | -4.61 | [-7.14;-1.9] | [-6.28;-2.91] |
OR/IRR | 0.95 | [0.93;0.98] | [0.94;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r11_separate_random_effects_for_parents.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
It need not be the case that paternal age has the same effect on male and female children. For example, male children inherit only the small Y chromosome from the father, but female children inherit the larger X chromosome, so that paternal age predicts X-chromosomal de novo mutations in females but not in males (Francioli et al., 2016). At the same time, the autism literature suggests that males are less robust to heritable and de novo autism risk variants and that these effects are not simply due to having only one X chromosome (Werling & Geschwind, 2015). Here we let a dummy variable for being male moderate the paternal age effect.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 7 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage * male + birth_cohort + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 1500; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.03 376 1.02
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.71 0.02 0.67 0.74 3000
## paternalage -0.04 0.01 -0.07 -0.01 1652
## male1 -0.02 0.02 -0.06 0.02 3000
## birth_cohort1950M1955 0.00 0.01 -0.02 0.01 3000
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 3000
## maternalage.factor1420 0.07 0.01 0.05 0.08 3000
## maternalage.factor3561 -0.01 0.01 -0.02 0.01 3000
## paternalage.mean 0.00 0.01 -0.03 0.03 1528
## paternal_loss01 0.09 0.22 -0.39 0.50 3000
## paternal_loss15 -0.02 0.06 -0.13 0.10 3000
## paternal_loss510 -0.05 0.03 -0.11 0.00 3000
## paternal_loss1015 -0.02 0.02 -0.05 0.02 3000
## paternal_loss1520 0.00 0.01 -0.02 0.03 3000
## paternal_loss2025 0.00 0.01 -0.03 0.02 3000
## paternal_loss2530 -0.01 0.01 -0.03 0.01 3000
## paternal_loss3035 -0.01 0.01 -0.02 0.01 3000
## paternal_loss3540 -0.01 0.01 -0.02 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear -0.06 0.01 -0.07 -0.04 3000
## maternal_loss01 -0.26 0.21 -0.69 0.13 3000
## maternal_loss15 -0.07 0.09 -0.25 0.12 3000
## maternal_loss510 0.01 0.04 -0.08 0.09 3000
## maternal_loss1015 -0.02 0.03 -0.07 0.04 3000
## maternal_loss1520 -0.01 0.02 -0.05 0.03 3000
## maternal_loss2025 0.02 0.02 -0.01 0.06 3000
## maternal_loss2530 0.00 0.01 -0.03 0.03 3000
## maternal_loss3035 0.00 0.01 -0.02 0.02 3000
## maternal_loss3540 0.00 0.01 -0.02 0.02 3000
## maternal_loss4045 0.00 0.01 -0.02 0.01 3000
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000
## older_siblings1 0.02 0.01 0.00 0.03 1931
## older_siblings2 0.02 0.01 0.00 0.05 1521
## older_siblings3 0.03 0.02 0.00 0.07 1661
## older_siblings4 0.01 0.02 -0.04 0.05 1693
## older_siblings5P -0.06 0.03 -0.12 0.00 1644
## nr.siblings 0.04 0.00 0.03 0.04 1695
## last_born1 0.01 0.00 0.00 0.02 3000
## paternalage:male1 -0.01 0.01 -0.03 0.00 3000
## Rhat
## Intercept 1
## paternalage 1
## male1 1
## birth_cohort1950M1955 1
## birth_cohort1955M1960 1
## maternalage.factor1420 1
## maternalage.factor3561 1
## paternalage.mean 1
## paternal_loss01 1
## paternal_loss15 1
## paternal_loss510 1
## paternal_loss1015 1
## paternal_loss1520 1
## paternal_loss2025 1
## paternal_loss2530 1
## paternal_loss3035 1
## paternal_loss3540 1
## paternal_loss4045 1
## paternal_lossunclear 1
## maternal_loss01 1
## maternal_loss15 1
## maternal_loss510 1
## maternal_loss1015 1
## maternal_loss1520 1
## maternal_loss2025 1
## maternal_loss2530 1
## maternal_loss3035 1
## maternal_loss3540 1
## maternal_loss4045 1
## maternal_lossunclear 1
## older_siblings1 1
## older_siblings2 1
## older_siblings3 1
## older_siblings4 1
## older_siblings5P 1
## nr.siblings 1
## last_born1 1
## paternalage:male1 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.03 | 1.963 | 2.1 |
paternalage | 0.9608 | 0.9339 | 0.9882 |
male1 | 0.9787 | 0.9428 | 1.016 |
birth_cohort1950M1955 | 0.9952 | 0.9849 | 1.005 |
birth_cohort1955M1960 | 0.9976 | 0.9867 | 1.008 |
maternalage.factor1420 | 1.071 | 1.055 | 1.087 |
maternalage.factor3561 | 0.9933 | 0.9788 | 1.009 |
paternalage.mean | 1.001 | 0.9741 | 1.029 |
paternal_loss01 | 1.094 | 0.6797 | 1.656 |
paternal_loss15 | 0.983 | 0.8753 | 1.104 |
paternal_loss510 | 0.9498 | 0.8965 | 1.004 |
paternal_loss1015 | 0.9823 | 0.947 | 1.019 |
paternal_loss1520 | 1.004 | 0.9787 | 1.03 |
paternal_loss2025 | 0.9958 | 0.9735 | 1.019 |
paternal_loss2530 | 0.987 | 0.9684 | 1.006 |
paternal_loss3035 | 0.9949 | 0.9794 | 1.011 |
paternal_loss3540 | 0.9946 | 0.9806 | 1.009 |
paternal_loss4045 | 0.9865 | 0.9713 | 1.001 |
paternal_lossunclear | 0.9459 | 0.9348 | 0.9568 |
maternal_loss01 | 0.7707 | 0.5029 | 1.139 |
maternal_loss15 | 0.9361 | 0.7784 | 1.123 |
maternal_loss510 | 1.011 | 0.9267 | 1.097 |
maternal_loss1015 | 0.9844 | 0.934 | 1.04 |
maternal_loss1520 | 0.9927 | 0.9537 | 1.031 |
maternal_loss2025 | 1.025 | 0.9921 | 1.061 |
maternal_loss2530 | 1 | 0.972 | 1.029 |
maternal_loss3035 | 1.001 | 0.9787 | 1.024 |
maternal_loss3540 | 1.005 | 0.9851 | 1.024 |
maternal_loss4045 | 0.996 | 0.9779 | 1.014 |
maternal_lossunclear | 0.9816 | 0.972 | 0.9916 |
older_siblings1 | 1.016 | 1.003 | 1.03 |
older_siblings2 | 1.023 | 0.9994 | 1.047 |
older_siblings3 | 1.033 | 1 | 1.068 |
older_siblings4 | 1.008 | 0.9632 | 1.053 |
older_siblings5P | 0.9438 | 0.8899 | 1 |
nr.siblings | 1.04 | 1.034 | 1.046 |
last_born1 | 1.011 | 1.001 | 1.02 |
paternalage:male1 | 0.9861 | 0.9746 | 0.9977 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.92 | [1.88;1.95] | [1.9;1.94] |
estimate father 35y | 1.84 | [1.81;1.88] | [1.82;1.87] |
percentage change | -3.94 | [-6.61;-1.18] | [-5.62;-2.1] |
OR/IRR | 0.96 | [0.93;0.99] | [0.94;0.98] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r12_sex_moderation.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
We already control for the average paternal age at which the children in a family were born. The mean is a more complete summary of the reproductive timing of the father than the age at first birth. However, far more literature has examined age at first birth and it has the advantage of never being censored (although we of course try to rule out censoring by choosing appropriate subsets). Therefore, we added age at first birth as a covariate in this model.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 4 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage_at_1st_sib + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 1500; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.02 301 1.03
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.75 0.02 0.72 0.78 3000
## paternalage -0.04 0.01 -0.07 -0.02 1013
## birth_cohort1950M1955 -0.01 0.01 -0.02 0.00 3000
## birth_cohort1955M1960 -0.01 0.01 -0.02 0.01 3000
## male1 -0.07 0.00 -0.07 -0.06 3000
## maternalage.factor1420 0.07 0.01 0.05 0.08 3000
## maternalage.factor3561 -0.01 0.01 -0.02 0.00 3000
## paternalage_at_1st_sib -0.07 0.01 -0.09 -0.05 3000
## paternalage.mean 0.06 0.02 0.03 0.09 1239
## paternal_loss01 0.09 0.22 -0.37 0.50 3000
## paternal_loss15 -0.02 0.06 -0.13 0.10 3000
## paternal_loss510 -0.05 0.03 -0.11 0.00 3000
## paternal_loss1015 -0.02 0.02 -0.05 0.02 3000
## paternal_loss1520 0.00 0.01 -0.02 0.03 3000
## paternal_loss2025 0.00 0.01 -0.03 0.02 3000
## paternal_loss2530 -0.01 0.01 -0.03 0.01 3000
## paternal_loss3035 -0.01 0.01 -0.02 0.01 3000
## paternal_loss3540 -0.01 0.01 -0.02 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear -0.05 0.01 -0.07 -0.04 3000
## maternal_loss01 -0.26 0.21 -0.69 0.13 3000
## maternal_loss15 -0.07 0.09 -0.25 0.11 3000
## maternal_loss510 0.01 0.04 -0.07 0.09 3000
## maternal_loss1015 -0.02 0.03 -0.07 0.04 3000
## maternal_loss1520 -0.01 0.02 -0.05 0.03 3000
## maternal_loss2025 0.02 0.02 -0.01 0.06 3000
## maternal_loss2530 0.00 0.01 -0.03 0.03 3000
## maternal_loss3035 0.00 0.01 -0.02 0.02 3000
## maternal_loss3540 0.00 0.01 -0.01 0.02 3000
## maternal_loss4045 0.00 0.01 -0.02 0.01 3000
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000
## older_siblings1 0.01 0.01 0.00 0.03 1300
## older_siblings2 0.02 0.01 0.00 0.04 1082
## older_siblings3 0.03 0.02 0.00 0.06 921
## older_siblings4 0.01 0.02 -0.04 0.05 1149
## older_siblings5P -0.05 0.03 -0.10 0.01 977
## nr.siblings 0.03 0.00 0.03 0.04 1136
## last_born1 0.01 0.00 0.00 0.02 3000
## Rhat
## Intercept 1.00
## paternalage 1.01
## birth_cohort1950M1955 1.00
## birth_cohort1955M1960 1.00
## male1 1.00
## maternalage.factor1420 1.00
## maternalage.factor3561 1.00
## paternalage_at_1st_sib 1.00
## paternalage.mean 1.01
## paternal_loss01 1.00
## paternal_loss15 1.00
## paternal_loss510 1.00
## paternal_loss1015 1.00
## paternal_loss1520 1.00
## paternal_loss2025 1.00
## paternal_loss2530 1.00
## paternal_loss3035 1.00
## paternal_loss3540 1.00
## paternal_loss4045 1.00
## paternal_lossunclear 1.00
## maternal_loss01 1.00
## maternal_loss15 1.00
## maternal_loss510 1.00
## maternal_loss1015 1.00
## maternal_loss1520 1.00
## maternal_loss2025 1.00
## maternal_loss2530 1.00
## maternal_loss3035 1.00
## maternal_loss3540 1.00
## maternal_loss4045 1.00
## maternal_lossunclear 1.00
## older_siblings1 1.01
## older_siblings2 1.01
## older_siblings3 1.01
## older_siblings4 1.01
## older_siblings5P 1.01
## nr.siblings 1.01
## last_born1 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.109 | 2.048 | 2.175 |
paternalage | 0.9585 | 0.9335 | 0.9835 |
birth_cohort1950M1955 | 0.9936 | 0.9828 | 1.004 |
birth_cohort1955M1960 | 0.9945 | 0.983 | 1.006 |
male1 | 0.9366 | 0.9288 | 0.9443 |
maternalage.factor1420 | 1.068 | 1.051 | 1.084 |
maternalage.factor3561 | 0.9897 | 0.9762 | 1.004 |
paternalage_at_1st_sib | 0.9344 | 0.9183 | 0.9519 |
paternalage.mean | 1.059 | 1.027 | 1.092 |
paternal_loss01 | 1.097 | 0.6918 | 1.648 |
paternal_loss15 | 0.9811 | 0.8759 | 1.101 |
paternal_loss510 | 0.947 | 0.8961 | 1.004 |
paternal_loss1015 | 0.9815 | 0.9479 | 1.016 |
paternal_loss1520 | 1.004 | 0.9787 | 1.032 |
paternal_loss2025 | 0.9957 | 0.9752 | 1.017 |
paternal_loss2530 | 0.987 | 0.969 | 1.005 |
paternal_loss3035 | 0.9948 | 0.9784 | 1.012 |
paternal_loss3540 | 0.9943 | 0.98 | 1.009 |
paternal_loss4045 | 0.986 | 0.9717 | 1.001 |
paternal_lossunclear | 0.9469 | 0.9362 | 0.9574 |
maternal_loss01 | 0.7686 | 0.5001 | 1.142 |
maternal_loss15 | 0.9361 | 0.7778 | 1.121 |
maternal_loss510 | 1.009 | 0.9304 | 1.096 |
maternal_loss1015 | 0.9845 | 0.9354 | 1.037 |
maternal_loss1520 | 0.991 | 0.9534 | 1.03 |
maternal_loss2025 | 1.024 | 0.9924 | 1.058 |
maternal_loss2530 | 0.9999 | 0.9736 | 1.026 |
maternal_loss3035 | 1 | 0.9772 | 1.024 |
maternal_loss3540 | 1.004 | 0.9852 | 1.024 |
maternal_loss4045 | 0.996 | 0.9777 | 1.015 |
maternal_lossunclear | 0.9821 | 0.9723 | 0.9919 |
older_siblings1 | 1.013 | 1 | 1.027 |
older_siblings2 | 1.019 | 0.9964 | 1.042 |
older_siblings3 | 1.029 | 0.9971 | 1.063 |
older_siblings4 | 1.007 | 0.9649 | 1.052 |
older_siblings5P | 0.9506 | 0.9004 | 1.006 |
nr.siblings | 1.033 | 1.027 | 1.039 |
last_born1 | 1.009 | 1 | 1.018 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.95 | [1.92;1.98] | [1.93;1.97] |
estimate father 35y | 1.87 | [1.83;1.91] | [1.84;1.89] |
percentage change | -4.15 | [-6.65;-1.65] | [-5.8;-2.52] |
OR/IRR | 0.96 | [0.93;0.98] | [0.94;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r13_control_paternal_afb.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Most of the previous literature has not used multilevel modelling, but linear group fixed effects (essentially dummy variables on the many thousands of families in the model). We believe our multilevel modelling approach has the advantage of allowing us to examine the effect of including predictors at the level of the family in the same model.
This allows us to
a) appropriately model a zero-inflated outcome such as number of children including those who died young (we’re not aware of a linear group fixed effect approach that handles hurdle or zero-inflated models)
b) examine group-level slopes for paternal age and potentially to examine moderators at the level of the family (though we did not do this)
c) explicitly model confounders at the level of the family (e.g. number of siblings).
Nevertheless, the prevalence of this approach in the literature mandates that we show how our approach compares. We fit this model using the R package “lfe” and the function felm. All covariates that were not estimable in principle were removed (i.e. number of siblings, paternalage.mean).
##
## Call:
## felm(formula = children ~ paternalage + birth_cohort + male + maternalage.factor + paternal_loss + maternal_loss + older_siblings + last_born | idParents, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.554 -0.094 0.000 0.098 10.556
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## paternalage -0.049562 0.057658 -0.86 0.3900
## birth_cohort1950-1955 0.021454 0.021372 1.00 0.3155
## birth_cohort1955-1960 0.053001 0.038154 1.39 0.1648
## male1 -0.124342 0.011099 -11.20 <2e-16 ***
## maternalage.factor(14,20] 0.031807 0.025515 1.25 0.2125
## maternalage.factor(35,61] -0.014914 0.026926 -0.55 0.5797
## paternal_loss[0,1] 1.766907 0.853856 2.07 0.0385 *
## paternal_loss(1,5] 0.113027 0.235945 0.48 0.6319
## paternal_loss(5,10] -0.288204 0.152810 -1.89 0.0593 .
## paternal_loss(10,15] -0.215138 0.121585 -1.77 0.0768 .
## paternal_loss(15,20] -0.069973 0.098897 -0.71 0.4792
## paternal_loss(20,25] -0.020554 0.081351 -0.25 0.8005
## paternal_loss(25,30] -0.055293 0.066102 -0.84 0.4029
## paternal_loss(30,35] -0.057376 0.051661 -1.11 0.2667
## paternal_loss(35,40] -0.044389 0.037796 -1.17 0.2402
## paternal_loss(40,45] -0.042005 0.027412 -1.53 0.1254
## paternal_lossunclear NA NA NA NA
## maternal_loss[0,1] -1.800020 1.254087 -1.44 0.1512
## maternal_loss(1,5] -0.387045 0.340402 -1.14 0.2555
## maternal_loss(5,10] -0.116220 0.204045 -0.57 0.5690
## maternal_loss(10,15] -0.432018 0.164399 -2.63 0.0086 **
## maternal_loss(15,20] -0.221439 0.133350 -1.66 0.0968 .
## maternal_loss(20,25] -0.222986 0.108839 -2.05 0.0405 *
## maternal_loss(25,30] -0.120767 0.086222 -1.40 0.1613
## maternal_loss(30,35] -0.056786 0.066186 -0.86 0.3909
## maternal_loss(35,40] -0.027403 0.046777 -0.59 0.5580
## maternal_loss(40,45] -0.027129 0.033004 -0.82 0.4111
## maternal_lossunclear NA NA NA NA
## older_siblings1 -0.028957 0.015712 -1.84 0.0653 .
## older_siblings2 -0.035176 0.027560 -1.28 0.2018
## older_siblings3 0.000161 0.040179 0.00 0.9968
## older_siblings4 -0.081790 0.054792 -1.49 0.1355
## older_siblings5+ -0.188366 0.073146 -2.58 0.0100 *
## last_born1 0.013869 0.009533 1.45 0.1457
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.21 on 47252 degrees of freedom
## Multiple R-squared(full model): 0.665 Adjusted R-squared: 0.0981
## Multiple R-squared(proj model): 0.00452 Adjusted R-squared: -1.68
## F-statistic(full model):1.17 on 80031 and 47252 DF, p-value: <2e-16
## F-statistic(proj model): 6.31 on 34 and 47252 DF, p-value: <2e-16
In this model we attempted allow for regional variation in paternal age effects and attempted to better control residual variation. Our approach was two-fold: to moderate paternal age by region and to add a random effect for the church parish in which the individual was born. However, for the modern Swedish data, we had no geographic data and no regional information, so this model was not fit.
Only in the DDB (historical Swedish data), parishes in some of the regions were still unlinked. This means that individuals could occur in more than one parish and not be linked. However, the region of Skellefteå was fully linked. Here, we test what happens when we restrict our dataset to Skellefteå.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 1 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 126969)
## Samples: 6 chains, each with iter = 800; warmup = 300; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 79823)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.02 355 1.02
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.72 0.02 0.69 0.75 3000
## paternalage -0.04 0.01 -0.07 -0.01 868
## birth_cohort1950M1955 0.00 0.01 -0.01 0.01 3000
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 3000
## male1 -0.07 0.00 -0.07 -0.06 3000
## maternalage.factor1420 0.07 0.01 0.05 0.09 3000
## maternalage.factor3561 0.01 0.01 -0.01 0.02 3000
## paternalage.mean 0.00 0.01 -0.03 0.03 920
## paternal_loss01 0.10 0.22 -0.35 0.50 3000
## paternal_loss15 -0.01 0.06 -0.13 0.10 3000
## paternal_loss510 -0.05 0.03 -0.10 0.01 3000
## paternal_loss1015 -0.02 0.02 -0.05 0.02 3000
## paternal_loss1520 0.01 0.01 -0.02 0.03 3000
## paternal_loss2025 0.00 0.01 -0.03 0.02 3000
## paternal_loss2530 -0.01 0.01 -0.03 0.01 3000
## paternal_loss3035 -0.01 0.01 -0.02 0.01 3000
## paternal_loss3540 -0.01 0.01 -0.02 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear -0.05 0.01 -0.07 -0.04 3000
## maternal_loss01 -0.26 0.21 -0.69 0.13 3000
## maternal_loss15 -0.07 0.09 -0.25 0.10 3000
## maternal_loss510 0.02 0.04 -0.07 0.10 3000
## maternal_loss1015 -0.02 0.03 -0.07 0.03 3000
## maternal_loss1520 0.00 0.02 -0.04 0.04 3000
## maternal_loss2025 0.03 0.02 -0.01 0.06 3000
## maternal_loss2530 0.00 0.01 -0.02 0.03 3000
## maternal_loss3035 0.00 0.01 -0.02 0.03 3000
## maternal_loss3540 0.01 0.01 -0.01 0.03 3000
## maternal_loss4045 0.00 0.01 -0.02 0.02 3000
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000
## older_siblings1 0.01 0.01 0.00 0.03 960
## older_siblings2 0.02 0.01 0.00 0.04 833
## older_siblings3 0.03 0.02 -0.01 0.06 868
## older_siblings4 0.00 0.02 -0.04 0.05 913
## older_siblings5P -0.06 0.03 -0.12 0.00 891
## nr.siblings 0.04 0.00 0.03 0.05 909
## last_born1 0.01 0.00 0.00 0.02 3000
## Rhat
## Intercept 1.00
## paternalage 1.01
## birth_cohort1950M1955 1.00
## birth_cohort1955M1960 1.00
## male1 1.00
## maternalage.factor1420 1.00
## maternalage.factor3561 1.00
## paternalage.mean 1.01
## paternal_loss01 1.00
## paternal_loss15 1.00
## paternal_loss510 1.00
## paternal_loss1015 1.00
## paternal_loss1520 1.00
## paternal_loss2025 1.00
## paternal_loss2530 1.00
## paternal_loss3035 1.00
## paternal_loss3540 1.00
## paternal_loss4045 1.00
## paternal_lossunclear 1.00
## maternal_loss01 1.00
## maternal_loss15 1.00
## maternal_loss510 1.00
## maternal_loss1015 1.00
## maternal_loss1520 1.00
## maternal_loss2025 1.00
## maternal_loss2530 1.00
## maternal_loss3035 1.00
## maternal_loss3540 1.00
## maternal_loss4045 1.00
## maternal_lossunclear 1.00
## older_siblings1 1.01
## older_siblings2 1.01
## older_siblings3 1.01
## older_siblings4 1.00
## older_siblings5P 1.01
## nr.siblings 1.01
## last_born1 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.057 | 1.996 | 2.119 |
paternalage | 0.9593 | 0.9334 | 0.9876 |
birth_cohort1950M1955 | 0.9954 | 0.9854 | 1.006 |
birth_cohort1955M1960 | 0.9975 | 0.9867 | 1.009 |
male1 | 0.937 | 0.9297 | 0.9446 |
maternalage.factor1420 | 1.072 | 1.056 | 1.089 |
maternalage.factor3561 | 1.007 | 0.9926 | 1.022 |
paternalage.mean | 0.9984 | 0.9709 | 1.026 |
paternal_loss01 | 1.101 | 0.7015 | 1.642 |
paternal_loss15 | 0.9853 | 0.8796 | 1.104 |
paternal_loss510 | 0.9549 | 0.9018 | 1.011 |
paternal_loss1015 | 0.9827 | 0.9484 | 1.018 |
paternal_loss1520 | 1.006 | 0.9805 | 1.033 |
paternal_loss2025 | 0.9967 | 0.975 | 1.019 |
paternal_loss2530 | 0.9881 | 0.9689 | 1.006 |
paternal_loss3035 | 0.9947 | 0.9788 | 1.011 |
paternal_loss3540 | 0.9944 | 0.98 | 1.009 |
paternal_loss4045 | 0.9871 | 0.9724 | 1.002 |
paternal_lossunclear | 0.9471 | 0.9365 | 0.9579 |
maternal_loss01 | 0.7685 | 0.501 | 1.133 |
maternal_loss15 | 0.9346 | 0.7819 | 1.104 |
maternal_loss510 | 1.017 | 0.9369 | 1.105 |
maternal_loss1015 | 0.9838 | 0.9331 | 1.035 |
maternal_loss1520 | 0.9965 | 0.9582 | 1.036 |
maternal_loss2025 | 1.027 | 0.993 | 1.061 |
maternal_loss2530 | 1.005 | 0.978 | 1.033 |
maternal_loss3035 | 1.003 | 0.9805 | 1.025 |
maternal_loss3540 | 1.008 | 0.9875 | 1.028 |
maternal_loss4045 | 0.9995 | 0.9817 | 1.017 |
maternal_lossunclear | 0.9826 | 0.9726 | 0.9924 |
older_siblings1 | 1.015 | 1 | 1.029 |
older_siblings2 | 1.02 | 0.9966 | 1.044 |
older_siblings3 | 1.029 | 0.9949 | 1.064 |
older_siblings4 | 1.004 | 0.96 | 1.052 |
older_siblings5P | 0.939 | 0.886 | 0.9961 |
nr.siblings | 1.04 | 1.035 | 1.046 |
last_born1 | 1.01 | 1.001 | 1.02 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.92 | [1.89;1.95] | [1.9;1.94] |
estimate father 35y | 1.84 | [1.81;1.88] | [1.82;1.87] |
percentage change | -4.09 | [-6.66;-1.24] | [-5.86;-2.26] |
OR/IRR | 0.96 | [0.93;0.99] | [0.94;0.98] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r17_simulate_downs.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
To make models computationally feasible and because early mortality was negligible, we fit the very large modern Swedish dataset with a poisson()
family distribution. All historical datasets had high early mortality, so we thought a hurdle_poisson()
was more appropriate. Here, we show what happens when we reverse this. The hurdle_poisson()
model can be fit to the modern Swedish data here, because we only use a subset.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
print(model_summary)
## Family: hurdle_poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## hu ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 1500; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
## b_hu ~ normal(0,5)
## sd_hu ~ student_t(3, 0, 10)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.00 0.00 0.00 0.01 1909 1.00
## sd(hu_Intercept) 0.91 0.02 0.87 0.96 453 1.01
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.58 0.02 0.55 0.62 3000
## paternalage -0.01 0.02 -0.04 0.02 679
## birth_cohort1950M1955 0.02 0.01 0.00 0.03 3000
## birth_cohort1955M1960 0.04 0.01 0.02 0.05 3000
## male1 0.03 0.00 0.02 0.04 3000
## maternalage.factor1420 0.04 0.01 0.02 0.06 3000
## maternalage.factor3561 0.00 0.01 -0.02 0.02 3000
## paternalage.mean 0.01 0.02 -0.02 0.04 670
## paternal_loss01 -0.17 0.28 -0.77 0.32 3000
## paternal_loss15 0.00 0.07 -0.15 0.13 3000
## paternal_loss510 -0.01 0.04 -0.08 0.06 3000
## paternal_loss1015 -0.01 0.02 -0.06 0.03 3000
## paternal_loss1520 0.02 0.02 -0.01 0.05 3000
## paternal_loss2025 -0.02 0.01 -0.04 0.01 3000
## paternal_loss2530 -0.01 0.01 -0.04 0.01 3000
## paternal_loss3035 0.00 0.01 -0.02 0.02 3000
## paternal_loss3540 -0.01 0.01 -0.03 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear 0.01 0.01 -0.01 0.02 3000
## maternal_loss01 -0.01 0.25 -0.53 0.46 3000
## maternal_loss15 -0.03 0.12 -0.26 0.19 3000
## maternal_loss510 0.00 0.05 -0.10 0.10 3000
## maternal_loss1015 0.01 0.03 -0.06 0.07 3000
## maternal_loss1520 0.00 0.02 -0.05 0.05 3000
## maternal_loss2025 0.03 0.02 -0.01 0.07 3000
## maternal_loss2530 0.02 0.02 -0.02 0.05 3000
## maternal_loss3035 0.02 0.01 -0.01 0.05 3000
## maternal_loss3540 0.01 0.01 -0.01 0.04 3000
## maternal_loss4045 0.00 0.01 -0.02 0.02 3000
## maternal_lossunclear 0.01 0.01 -0.01 0.02 3000
## older_siblings1 -0.01 0.01 -0.03 0.00 815
## older_siblings2 -0.02 0.01 -0.04 0.01 728
## older_siblings3 -0.01 0.02 -0.05 0.03 718
## older_siblings4 -0.04 0.03 -0.09 0.02 764
## older_siblings5P -0.12 0.03 -0.19 -0.05 748
## nr.siblings 0.04 0.00 0.03 0.05 731
## last_born1 0.00 0.01 -0.01 0.02 3000
## hu_Intercept -2.73 0.06 -2.84 -2.61 1950
## hu_paternalage 0.22 0.06 0.11 0.33 708
## hu_birth_cohort1950M1955 0.10 0.02 0.06 0.14 3000
## hu_birth_cohort1955M1960 0.16 0.02 0.12 0.20 2414
## hu_male1 0.48 0.02 0.45 0.51 3000
## hu_maternalage.factor1420 -0.24 0.03 -0.30 -0.18 3000
## hu_maternalage.factor3561 0.04 0.03 -0.02 0.10 3000
## hu_paternalage.mean 0.04 0.06 -0.07 0.15 733
## hu_paternal_loss01 -4.79 2.93 -11.75 -0.48 3000
## hu_paternal_loss15 0.07 0.22 -0.36 0.51 3000
## hu_paternal_loss510 0.22 0.11 0.02 0.44 3000
## hu_paternal_loss1015 0.05 0.07 -0.08 0.19 3000
## hu_paternal_loss1520 0.05 0.05 -0.05 0.15 3000
## hu_paternal_loss2025 -0.04 0.04 -0.13 0.05 3000
## hu_paternal_loss2530 0.02 0.04 -0.05 0.09 3000
## hu_paternal_loss3035 0.02 0.03 -0.04 0.09 3000
## hu_paternal_loss3540 0.00 0.03 -0.06 0.06 3000
## hu_paternal_loss4045 0.03 0.03 -0.03 0.09 3000
## hu_paternal_lossunclear 0.35 0.02 0.30 0.39 2232
## hu_maternal_loss01 1.14 0.62 -0.09 2.26 3000
## hu_maternal_loss15 0.26 0.34 -0.39 0.90 3000
## hu_maternal_loss510 -0.06 0.17 -0.40 0.26 3000
## hu_maternal_loss1015 0.13 0.10 -0.08 0.33 3000
## hu_maternal_loss1520 0.05 0.08 -0.11 0.20 3000
## hu_maternal_loss2025 -0.01 0.07 -0.14 0.12 3000
## hu_maternal_loss2530 0.06 0.05 -0.04 0.17 3000
## hu_maternal_loss3035 0.08 0.05 -0.01 0.17 3000
## hu_maternal_loss3540 0.03 0.04 -0.04 0.11 3000
## hu_maternal_loss4045 0.03 0.04 -0.05 0.10 3000
## hu_maternal_lossunclear 0.14 0.02 0.10 0.18 2524
## hu_older_siblings1 -0.12 0.03 -0.17 -0.07 840
## hu_older_siblings2 -0.17 0.05 -0.26 -0.08 735
## hu_older_siblings3 -0.21 0.07 -0.34 -0.07 720
## hu_older_siblings4 -0.13 0.09 -0.31 0.05 805
## hu_older_siblings5P -0.08 0.12 -0.31 0.14 733
## hu_nr.siblings -0.08 0.01 -0.10 -0.06 723
## hu_last_born1 -0.04 0.02 -0.07 -0.01 3000
## Rhat
## Intercept 1.00
## paternalage 1.01
## birth_cohort1950M1955 1.00
## birth_cohort1955M1960 1.00
## male1 1.00
## maternalage.factor1420 1.00
## maternalage.factor3561 1.00
## paternalage.mean 1.01
## paternal_loss01 1.00
## paternal_loss15 1.00
## paternal_loss510 1.00
## paternal_loss1015 1.00
## paternal_loss1520 1.00
## paternal_loss2025 1.00
## paternal_loss2530 1.00
## paternal_loss3035 1.00
## paternal_loss3540 1.00
## paternal_loss4045 1.00
## paternal_lossunclear 1.00
## maternal_loss01 1.00
## maternal_loss15 1.00
## maternal_loss510 1.00
## maternal_loss1015 1.00
## maternal_loss1520 1.00
## maternal_loss2025 1.00
## maternal_loss2530 1.00
## maternal_loss3035 1.00
## maternal_loss3540 1.00
## maternal_loss4045 1.00
## maternal_lossunclear 1.00
## older_siblings1 1.01
## older_siblings2 1.01
## older_siblings3 1.01
## older_siblings4 1.01
## older_siblings5P 1.01
## nr.siblings 1.01
## last_born1 1.00
## hu_Intercept 1.00
## hu_paternalage 1.00
## hu_birth_cohort1950M1955 1.00
## hu_birth_cohort1955M1960 1.00
## hu_male1 1.00
## hu_maternalage.factor1420 1.00
## hu_maternalage.factor3561 1.00
## hu_paternalage.mean 1.00
## hu_paternal_loss01 1.00
## hu_paternal_loss15 1.00
## hu_paternal_loss510 1.00
## hu_paternal_loss1015 1.00
## hu_paternal_loss1520 1.00
## hu_paternal_loss2025 1.00
## hu_paternal_loss2530 1.00
## hu_paternal_loss3035 1.00
## hu_paternal_loss3540 1.00
## hu_paternal_loss4045 1.00
## hu_paternal_lossunclear 1.00
## hu_maternal_loss01 1.00
## hu_maternal_loss15 1.00
## hu_maternal_loss510 1.00
## hu_maternal_loss1015 1.00
## hu_maternal_loss1520 1.00
## hu_maternal_loss2025 1.00
## hu_maternal_loss2530 1.00
## hu_maternal_loss3035 1.00
## hu_maternal_loss3540 1.00
## hu_maternal_loss4045 1.00
## hu_maternal_lossunclear 1.00
## hu_older_siblings1 1.00
## hu_older_siblings2 1.00
## hu_older_siblings3 1.00
## hu_older_siblings4 1.00
## hu_older_siblings5P 1.00
## hu_nr.siblings 1.00
## hu_last_born1 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 1.79 | 1.727 | 1.853 |
paternalage | 0.9892 | 0.9578 | 1.023 |
birth_cohort1950M1955 | 1.017 | 1.004 | 1.03 |
birth_cohort1955M1960 | 1.038 | 1.024 | 1.052 |
male1 | 1.026 | 1.016 | 1.036 |
maternalage.factor1420 | 1.04 | 1.021 | 1.059 |
maternalage.factor3561 | 1.002 | 0.9828 | 1.02 |
paternalage.mean | 1.011 | 0.9771 | 1.043 |
paternal_loss01 | 0.8427 | 0.4643 | 1.38 |
paternal_loss15 | 0.9966 | 0.8592 | 1.144 |
paternal_loss510 | 0.9896 | 0.9237 | 1.058 |
paternal_loss1015 | 0.987 | 0.9462 | 1.028 |
paternal_loss1520 | 1.021 | 0.9902 | 1.052 |
paternal_loss2025 | 0.9843 | 0.9591 | 1.01 |
paternal_loss2530 | 0.9859 | 0.9642 | 1.008 |
paternal_loss3035 | 0.9983 | 0.9792 | 1.018 |
paternal_loss3540 | 0.9919 | 0.9751 | 1.011 |
paternal_loss4045 | 0.9863 | 0.9683 | 1.004 |
paternal_lossunclear | 1.008 | 0.9948 | 1.022 |
maternal_loss01 | 0.9925 | 0.586 | 1.577 |
maternal_loss15 | 0.9737 | 0.7712 | 1.209 |
maternal_loss510 | 1 | 0.9012 | 1.105 |
maternal_loss1015 | 1.008 | 0.9437 | 1.075 |
maternal_loss1520 | 0.9986 | 0.9515 | 1.046 |
maternal_loss2025 | 1.032 | 0.9926 | 1.072 |
maternal_loss2530 | 1.018 | 0.9835 | 1.053 |
maternal_loss3035 | 1.02 | 0.992 | 1.05 |
maternal_loss3540 | 1.014 | 0.9903 | 1.039 |
maternal_loss4045 | 1.001 | 0.9785 | 1.023 |
maternal_lossunclear | 1.007 | 0.9945 | 1.019 |
older_siblings1 | 0.9886 | 0.9726 | 1.005 |
older_siblings2 | 0.9841 | 0.9576 | 1.011 |
older_siblings3 | 0.9894 | 0.9511 | 1.028 |
older_siblings4 | 0.9654 | 0.9145 | 1.017 |
older_siblings5P | 0.8876 | 0.8279 | 0.9506 |
nr.siblings | 1.041 | 1.034 | 1.047 |
last_born1 | 1.004 | 0.9928 | 1.015 |
hu_Intercept | 0.06541 | 0.05825 | 0.07377 |
hu_paternalage | 1.24 | 1.115 | 1.385 |
hu_birth_cohort1950M1955 | 1.106 | 1.062 | 1.154 |
hu_birth_cohort1955M1960 | 1.176 | 1.126 | 1.228 |
hu_male1 | 1.62 | 1.57 | 1.672 |
hu_maternalage.factor1420 | 0.7862 | 0.7392 | 0.8365 |
hu_maternalage.factor3561 | 1.041 | 0.9833 | 1.103 |
hu_paternalage.mean | 1.039 | 0.9331 | 1.161 |
hu_paternal_loss01 | 0.008336 | 7.876e-06 | 0.6176 |
hu_paternal_loss15 | 1.078 | 0.6944 | 1.664 |
hu_paternal_loss510 | 1.251 | 1.017 | 1.546 |
hu_paternal_loss1015 | 1.054 | 0.9222 | 1.21 |
hu_paternal_loss1520 | 1.051 | 0.9506 | 1.166 |
hu_paternal_loss2025 | 0.963 | 0.882 | 1.051 |
hu_paternal_loss2530 | 1.019 | 0.9479 | 1.096 |
hu_paternal_loss3035 | 1.023 | 0.9591 | 1.09 |
hu_paternal_loss3540 | 1.002 | 0.9434 | 1.062 |
hu_paternal_loss4045 | 1.029 | 0.9702 | 1.092 |
hu_paternal_lossunclear | 1.413 | 1.35 | 1.479 |
hu_maternal_loss01 | 3.118 | 0.913 | 9.613 |
hu_maternal_loss15 | 1.298 | 0.6761 | 2.456 |
hu_maternal_loss510 | 0.9401 | 0.6732 | 1.301 |
hu_maternal_loss1015 | 1.144 | 0.9245 | 1.395 |
hu_maternal_loss1520 | 1.049 | 0.8929 | 1.223 |
hu_maternal_loss2025 | 0.9948 | 0.8717 | 1.13 |
hu_maternal_loss2530 | 1.065 | 0.9563 | 1.187 |
hu_maternal_loss3035 | 1.084 | 0.9916 | 1.182 |
hu_maternal_loss3540 | 1.034 | 0.9562 | 1.118 |
hu_maternal_loss4045 | 1.028 | 0.9557 | 1.105 |
hu_maternal_lossunclear | 1.146 | 1.101 | 1.193 |
hu_older_siblings1 | 0.8855 | 0.8415 | 0.933 |
hu_older_siblings2 | 0.8424 | 0.7699 | 0.9218 |
hu_older_siblings3 | 0.8138 | 0.7136 | 0.9307 |
hu_older_siblings4 | 0.8783 | 0.7348 | 1.049 |
hu_older_siblings5P | 0.9196 | 0.7356 | 1.155 |
hu_nr.siblings | 0.924 | 0.9027 | 0.9452 |
hu_last_born1 | 0.9594 | 0.9282 | 0.9914 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.98 | [1.95;2.01] | [1.96;2] |
estimate father 35y | 1.92 | [1.88;1.96] | [1.9;1.94] |
percentage change | -3.14 | [-5.48;-0.71] | [-4.7;-1.53] |
OR/IRR | 0.99 | [0.96;1.02] | [0.97;1.01] |
OR hurdle | 1.24 | [1.12;1.38] | [1.15;1.33] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r18_hurdle_poisson.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Previous analysts sometimes decided to use the normal distribution to predict (potentially zero-inflated) count data. Here, we refit our models using a normal distribution for the outcome. We show that estimates for the paternal age effect can be estimated to have a substantially different magnitude, because of this, but did not change direction.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
print(model_summary)
## Family: gaussian(identity)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 800; warmup = 300; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
## sigma ~ student_t(3, 0, 10)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.41 0.01 0.4 0.43 491 1.01
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 2.06 0.03 2.01 2.12 3000
## paternalage -0.07 0.02 -0.12 -0.03 916
## birth_cohort1950M1955 -0.01 0.01 -0.03 0.01 3000
## birth_cohort1955M1960 0.00 0.01 -0.02 0.02 3000
## male1 -0.12 0.01 -0.14 -0.11 3000
## maternalage.factor1420 0.12 0.01 0.10 0.15 3000
## maternalage.factor3561 -0.01 0.01 -0.04 0.01 3000
## paternalage.mean -0.01 0.02 -0.06 0.04 938
## paternal_loss01 0.29 0.40 -0.50 1.07 3000
## paternal_loss15 -0.01 0.10 -0.20 0.18 3000
## paternal_loss510 -0.09 0.05 -0.19 0.01 3000
## paternal_loss1015 -0.03 0.03 -0.09 0.03 3000
## paternal_loss1520 0.01 0.02 -0.04 0.06 3000
## paternal_loss2025 0.00 0.02 -0.04 0.03 3000
## paternal_loss2530 -0.02 0.02 -0.05 0.01 3000
## paternal_loss3035 -0.01 0.01 -0.04 0.02 3000
## paternal_loss3540 -0.01 0.01 -0.04 0.02 3000
## paternal_loss4045 -0.02 0.01 -0.05 0.00 3000
## paternal_lossunclear -0.10 0.01 -0.12 -0.08 3000
## maternal_loss01 -0.42 0.32 -1.04 0.21 3000
## maternal_loss15 -0.11 0.15 -0.41 0.19 3000
## maternal_loss510 0.03 0.07 -0.11 0.18 3000
## maternal_loss1015 -0.03 0.05 -0.12 0.06 3000
## maternal_loss1520 -0.01 0.03 -0.08 0.05 3000
## maternal_loss2025 0.04 0.03 -0.01 0.10 3000
## maternal_loss2530 0.00 0.02 -0.04 0.05 3000
## maternal_loss3035 0.00 0.02 -0.04 0.04 3000
## maternal_loss3540 0.01 0.02 -0.03 0.04 3000
## maternal_loss4045 0.00 0.02 -0.04 0.03 3000
## maternal_lossunclear -0.03 0.01 -0.05 -0.02 3000
## older_siblings1 0.02 0.01 -0.01 0.04 1095
## older_siblings2 0.02 0.02 -0.02 0.06 1048
## older_siblings3 0.04 0.03 -0.02 0.09 966
## older_siblings4 -0.02 0.04 -0.10 0.06 1191
## older_siblings5P -0.15 0.05 -0.25 -0.05 1071
## nr.siblings 0.08 0.01 0.07 0.09 1076
## last_born1 0.02 0.01 0.00 0.03 3000
## Rhat
## Intercept 1.00
## paternalage 1.01
## birth_cohort1950M1955 1.00
## birth_cohort1955M1960 1.00
## male1 1.00
## maternalage.factor1420 1.00
## maternalage.factor3561 1.00
## paternalage.mean 1.01
## paternal_loss01 1.00
## paternal_loss15 1.00
## paternal_loss510 1.00
## paternal_loss1015 1.00
## paternal_loss1520 1.00
## paternal_loss2025 1.00
## paternal_loss2530 1.00
## paternal_loss3035 1.00
## paternal_loss3540 1.00
## paternal_loss4045 1.00
## paternal_lossunclear 1.00
## maternal_loss01 1.00
## maternal_loss15 1.00
## maternal_loss510 1.00
## maternal_loss1015 1.00
## maternal_loss1520 1.00
## maternal_loss2025 1.00
## maternal_loss2530 1.00
## maternal_loss3035 1.00
## maternal_loss3540 1.00
## maternal_loss4045 1.00
## maternal_lossunclear 1.00
## older_siblings1 1.01
## older_siblings2 1.01
## older_siblings3 1.01
## older_siblings4 1.01
## older_siblings5P 1.01
## nr.siblings 1.01
## last_born1 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 1.19 0 1.19 1.2 757 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 7.88 | 7.467 | 8.309 |
paternalage | 0.9302 | 0.8873 | 0.9736 |
birth_cohort1950M1955 | 0.9914 | 0.9739 | 1.01 |
birth_cohort1955M1960 | 0.9962 | 0.9771 | 1.016 |
male1 | 0.8854 | 0.8733 | 0.8979 |
maternalage.factor1420 | 1.129 | 1.1 | 1.16 |
maternalage.factor3561 | 0.9883 | 0.9633 | 1.015 |
paternalage.mean | 0.9881 | 0.9419 | 1.036 |
paternal_loss01 | 1.331 | 0.6044 | 2.924 |
paternal_loss15 | 0.9932 | 0.8178 | 1.2 |
paternal_loss510 | 0.9106 | 0.8251 | 1.007 |
paternal_loss1015 | 0.968 | 0.9101 | 1.031 |
paternal_loss1520 | 1.011 | 0.9654 | 1.059 |
paternal_loss2025 | 0.9961 | 0.9599 | 1.036 |
paternal_loss2530 | 0.9794 | 0.9506 | 1.01 |
paternal_loss3035 | 0.9925 | 0.9653 | 1.022 |
paternal_loss3540 | 0.9908 | 0.9653 | 1.017 |
paternal_loss4045 | 0.9758 | 0.952 | 1.001 |
paternal_lossunclear | 0.9014 | 0.8836 | 0.9191 |
maternal_loss01 | 0.6597 | 0.3546 | 1.237 |
maternal_loss15 | 0.8944 | 0.6647 | 1.21 |
maternal_loss510 | 1.034 | 0.8945 | 1.199 |
maternal_loss1015 | 0.9663 | 0.8832 | 1.057 |
maternal_loss1520 | 0.9852 | 0.921 | 1.054 |
maternal_loss2025 | 1.043 | 0.9863 | 1.105 |
maternal_loss2530 | 1.002 | 0.9568 | 1.05 |
maternal_loss3035 | 1 | 0.9597 | 1.042 |
maternal_loss3540 | 1.009 | 0.9743 | 1.042 |
maternal_loss4045 | 0.995 | 0.9624 | 1.028 |
maternal_lossunclear | 0.9662 | 0.9496 | 0.9832 |
older_siblings1 | 1.017 | 0.9931 | 1.041 |
older_siblings2 | 1.021 | 0.9805 | 1.062 |
older_siblings3 | 1.038 | 0.9794 | 1.098 |
older_siblings4 | 0.98 | 0.9066 | 1.058 |
older_siblings5P | 0.8579 | 0.7757 | 0.9488 |
nr.siblings | 1.084 | 1.074 | 1.095 |
last_born1 | 1.017 | 1.002 | 1.032 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.93 | [1.9;1.96] | [1.91;1.95] |
estimate father 35y | 1.86 | [1.82;1.89] | [1.83;1.88] |
percentage change | -3.73 | [-6.14;-1.4] | [-5.33;-2.2] |
OR/IRR | 0.93 | [0.89;0.97] | [0.9;0.96] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r19_normal_distribution.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
In this model, we test what happens when we do not adjust for maternal age, because it is highly collinear with paternal age.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 7 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.03 249 1.03
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 0.77 0.01 0.74 0.80 3000 1
## paternalage -0.05 0.01 -0.08 -0.03 1132 1
## birth_cohort1950M1955 0.00 0.01 -0.01 0.01 3000 1
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 3000 1
## male1 -0.07 0.00 -0.07 -0.06 3000 1
## paternalage.mean 0.00 0.01 -0.03 0.02 930 1
## paternal_loss01 0.09 0.22 -0.34 0.49 3000 1
## paternal_loss15 -0.01 0.06 -0.13 0.11 3000 1
## paternal_loss510 -0.05 0.03 -0.11 0.01 3000 1
## paternal_loss1015 -0.01 0.02 -0.05 0.02 3000 1
## paternal_loss1520 0.01 0.01 -0.02 0.03 3000 1
## paternal_loss2025 0.00 0.01 -0.02 0.02 3000 1
## paternal_loss2530 -0.01 0.01 -0.03 0.01 3000 1
## paternal_loss3035 0.00 0.01 -0.02 0.01 3000 1
## paternal_loss3540 0.00 0.01 -0.02 0.01 3000 1
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000 1
## paternal_lossunclear -0.05 0.01 -0.07 -0.04 3000 1
## maternal_loss01 -0.23 0.20 -0.65 0.14 3000 1
## maternal_loss15 -0.06 0.09 -0.23 0.11 3000 1
## maternal_loss510 0.01 0.04 -0.07 0.09 3000 1
## maternal_loss1015 -0.02 0.03 -0.07 0.04 3000 1
## maternal_loss1520 -0.01 0.02 -0.05 0.03 3000 1
## maternal_loss2025 0.03 0.02 -0.01 0.06 3000 1
## maternal_loss2530 0.00 0.01 -0.03 0.03 3000 1
## maternal_loss3035 0.00 0.01 -0.02 0.02 3000 1
## maternal_loss3540 0.01 0.01 -0.01 0.02 3000 1
## maternal_loss4045 0.00 0.01 -0.02 0.02 3000 1
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000 1
## older_siblings1 0.01 0.01 -0.01 0.02 1443 1
## older_siblings2 0.01 0.01 -0.01 0.04 1225 1
## older_siblings3 0.02 0.02 -0.01 0.05 1216 1
## older_siblings4 -0.01 0.02 -0.05 0.04 1332 1
## older_siblings5P -0.07 0.03 -0.13 -0.02 1308 1
## nr.siblings 0.04 0.00 0.04 0.05 1249 1
## last_born1 0.01 0.00 0.00 0.02 3000 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.16 | 2.101 | 2.221 |
paternalage | 0.9471 | 0.9235 | 0.9702 |
birth_cohort1950M1955 | 0.9953 | 0.9855 | 1.005 |
birth_cohort1955M1960 | 0.9976 | 0.987 | 1.008 |
male1 | 0.9364 | 0.9288 | 0.9442 |
paternalage.mean | 0.998 | 0.9731 | 1.025 |
paternal_loss01 | 1.092 | 0.7092 | 1.629 |
paternal_loss15 | 0.9914 | 0.8791 | 1.114 |
paternal_loss510 | 0.953 | 0.9 | 1.009 |
paternal_loss1015 | 0.9854 | 0.951 | 1.021 |
paternal_loss1520 | 1.008 | 0.9806 | 1.035 |
paternal_loss2025 | 0.9984 | 0.9766 | 1.02 |
paternal_loss2530 | 0.9889 | 0.9716 | 1.006 |
paternal_loss3035 | 0.9966 | 0.9801 | 1.013 |
paternal_loss3540 | 0.9955 | 0.9808 | 1.01 |
paternal_loss4045 | 0.9871 | 0.973 | 1.002 |
paternal_lossunclear | 0.9469 | 0.9367 | 0.9573 |
maternal_loss01 | 0.791 | 0.5247 | 1.148 |
maternal_loss15 | 0.945 | 0.7955 | 1.12 |
maternal_loss510 | 1.012 | 0.9344 | 1.096 |
maternal_loss1015 | 0.9849 | 0.9297 | 1.04 |
maternal_loss1520 | 0.9927 | 0.9539 | 1.033 |
maternal_loss2025 | 1.026 | 0.9923 | 1.059 |
maternal_loss2530 | 1 | 0.9732 | 1.03 |
maternal_loss3035 | 1.002 | 0.9788 | 1.025 |
maternal_loss3540 | 1.005 | 0.9863 | 1.024 |
maternal_loss4045 | 0.9969 | 0.9792 | 1.015 |
maternal_lossunclear | 0.9843 | 0.9747 | 0.9943 |
older_siblings1 | 1.007 | 0.9945 | 1.02 |
older_siblings2 | 1.012 | 0.9906 | 1.036 |
older_siblings3 | 1.021 | 0.9895 | 1.054 |
older_siblings4 | 0.995 | 0.9539 | 1.038 |
older_siblings5P | 0.9293 | 0.8802 | 0.9812 |
nr.siblings | 1.042 | 1.036 | 1.047 |
last_born1 | 1.009 | 1 | 1.017 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.95 | [1.92;1.99] | [1.93;1.97] |
estimate father 35y | 1.85 | [1.81;1.88] | [1.83;1.87] |
percentage change | -5.28 | [-7.65;-2.98] | [-6.84;-3.73] |
OR/IRR | 0.95 | [0.92;0.97] | [0.93;0.96] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r20_no_maternalage_control.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
In this model, we adjust for maternal age using a continuous variable instead of three bins. This does not allow for nonlinear effects, but also does not aggregate the predictor. We cannot compare full siblings, test the effects of maternal and paternal age and adjust for average maternal and paternal age in the family (because the predictors are redundant), so that it is not perfectly possible to disentangle the contribution of maternal and paternal age and compare full siblings.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + maternalage + birth_cohort + male + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 1408212)
## Samples: 6 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 885003)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0 0 0 0.01 182 1.04
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 0.82 0.00 0.82 0.83 3000 1
## paternalage -0.01 0.00 -0.02 0.00 2365 1
## maternalage -0.05 0.00 -0.05 -0.04 3000 1
## birth_cohort1950M1955 0.00 0.00 0.00 0.00 3000 1
## birth_cohort1955M1960 0.00 0.00 0.00 0.01 3000 1
## male1 -0.06 0.00 -0.07 -0.06 3000 1
## paternalage.mean -0.02 0.00 -0.03 -0.01 2282 1
## paternal_loss01 0.12 0.05 0.01 0.22 3000 1
## paternal_loss15 0.03 0.02 -0.01 0.07 3000 1
## paternal_loss510 -0.02 0.01 -0.03 0.00 3000 1
## paternal_loss1015 0.00 0.01 -0.01 0.01 3000 1
## paternal_loss1520 0.01 0.00 0.00 0.01 3000 1
## paternal_loss2025 0.00 0.00 -0.01 0.01 3000 1
## paternal_loss2530 0.00 0.00 0.00 0.01 3000 1
## paternal_loss3035 0.00 0.00 0.00 0.01 3000 1
## paternal_loss3540 0.00 0.00 -0.01 0.00 3000 1
## paternal_loss4045 -0.01 0.00 -0.01 0.00 3000 1
## paternal_lossunclear -0.05 0.00 -0.06 -0.05 3000 1
## maternal_loss01 -0.21 0.07 -0.35 -0.08 3000 1
## maternal_loss15 -0.06 0.03 -0.12 0.00 3000 1
## maternal_loss510 -0.01 0.01 -0.03 0.02 3000 1
## maternal_loss1015 0.00 0.01 -0.02 0.01 3000 1
## maternal_loss1520 0.00 0.01 -0.01 0.01 3000 1
## maternal_loss2025 0.01 0.01 0.00 0.02 3000 1
## maternal_loss2530 0.01 0.00 0.00 0.02 3000 1
## maternal_loss3035 0.01 0.00 0.00 0.02 3000 1
## maternal_loss3540 0.00 0.00 -0.01 0.01 3000 1
## maternal_loss4045 0.00 0.00 0.00 0.01 3000 1
## maternal_lossunclear -0.03 0.00 -0.03 -0.02 3000 1
## older_siblings1 0.01 0.00 0.01 0.01 3000 1
## older_siblings2 0.01 0.00 0.01 0.02 2383 1
## older_siblings3 0.01 0.01 0.00 0.02 2408 1
## older_siblings4 -0.01 0.01 -0.02 0.00 2425 1
## older_siblings5P -0.06 0.01 -0.08 -0.05 2174 1
## nr.siblings 0.04 0.00 0.04 0.04 2348 1
## last_born1 0.01 0.00 0.01 0.01 2971 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.281 | 2.259 | 2.301 |
paternalage | 0.9906 | 0.9826 | 0.9987 |
maternalage | 0.953 | 0.9498 | 0.9565 |
birth_cohort1950M1955 | 1.001 | 0.9978 | 1.004 |
birth_cohort1955M1960 | 1.002 | 0.9987 | 1.005 |
male1 | 0.9384 | 0.9361 | 0.9406 |
paternalage.mean | 0.9792 | 0.9712 | 0.987 |
paternal_loss01 | 1.122 | 1.007 | 1.243 |
paternal_loss15 | 1.031 | 0.9937 | 1.07 |
paternal_loss510 | 0.9831 | 0.9658 | 1 |
paternal_loss1015 | 0.9962 | 0.9861 | 1.006 |
paternal_loss1520 | 1.005 | 0.9974 | 1.014 |
paternal_loss2025 | 0.9992 | 0.9929 | 1.006 |
paternal_loss2530 | 1.004 | 0.9988 | 1.01 |
paternal_loss3035 | 1.001 | 0.9963 | 1.006 |
paternal_loss3540 | 0.9969 | 0.9925 | 1.001 |
paternal_loss4045 | 0.9921 | 0.9876 | 0.9965 |
paternal_lossunclear | 0.947 | 0.9438 | 0.9501 |
maternal_loss01 | 0.8112 | 0.7043 | 0.9265 |
maternal_loss15 | 0.944 | 0.8911 | 0.9981 |
maternal_loss510 | 0.9923 | 0.9675 | 1.017 |
maternal_loss1015 | 0.9978 | 0.9832 | 1.013 |
maternal_loss1520 | 0.9989 | 0.9871 | 1.011 |
maternal_loss2025 | 1.011 | 1.002 | 1.021 |
maternal_loss2530 | 1.009 | 1.001 | 1.017 |
maternal_loss3035 | 1.009 | 1.002 | 1.016 |
maternal_loss3540 | 0.9995 | 0.9936 | 1.005 |
maternal_loss4045 | 1.002 | 0.9967 | 1.008 |
maternal_lossunclear | 0.9744 | 0.9714 | 0.9774 |
older_siblings1 | 1.01 | 1.006 | 1.014 |
older_siblings2 | 1.014 | 1.007 | 1.02 |
older_siblings3 | 1.006 | 0.9964 | 1.016 |
older_siblings4 | 0.9904 | 0.9772 | 1.003 |
older_siblings5P | 0.9378 | 0.9219 | 0.9534 |
nr.siblings | 1.039 | 1.038 | 1.041 |
last_born1 | 1.009 | 1.006 | 1.012 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.9 | [1.89;1.91] | [1.89;1.9] |
estimate father 35y | 1.88 | [1.87;1.89] | [1.87;1.89] |
percentage change | -0.94 | [-1.74;-0.13] | [-1.49;-0.39] |
OR/IRR | 0.99 | [0.98;1] | [0.99;1] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r21_continuous_maternalage.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Like r1, but we use a 30-years-later cutoff year for our birth cohorts, relaxing our censoring requirements.
To demonstrate the robustness of our prior choice we use Student’s t priors (fatter tails than normal priors) for our population-level effects and a half-Cauchy prior for our group-level effect for the family.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 2 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 800; warmup = 300; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ student_t(5,0,10)
## sd ~ cauchy(0,5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.02 313 1.01
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.73 0.01 0.70 0.76 3000
## paternalage -0.05 0.01 -0.08 -0.02 1350
## birth_cohort1950M1955 0.00 0.01 -0.02 0.01 3000
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 3000
## male1 -0.07 0.00 -0.07 -0.06 3000
## maternalage.factor1420 0.07 0.01 0.05 0.08 3000
## maternalage.factor3561 -0.01 0.01 -0.02 0.01 3000
## paternalage.mean 0.00 0.01 -0.03 0.03 1386
## paternal_loss01 0.09 0.23 -0.37 0.51 3000
## paternal_loss15 -0.02 0.06 -0.13 0.10 3000
## paternal_loss510 -0.05 0.03 -0.11 0.00 3000
## paternal_loss1015 -0.02 0.02 -0.05 0.02 3000
## paternal_loss1520 0.00 0.01 -0.02 0.03 3000
## paternal_loss2025 0.00 0.01 -0.03 0.02 3000
## paternal_loss2530 -0.01 0.01 -0.03 0.01 3000
## paternal_loss3035 0.00 0.01 -0.02 0.01 3000
## paternal_loss3540 -0.01 0.01 -0.02 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear -0.06 0.01 -0.07 -0.04 3000
## maternal_loss01 -0.26 0.21 -0.68 0.13 3000
## maternal_loss15 -0.06 0.10 -0.25 0.12 3000
## maternal_loss510 0.01 0.04 -0.07 0.10 3000
## maternal_loss1015 -0.01 0.03 -0.07 0.04 3000
## maternal_loss1520 -0.01 0.02 -0.05 0.03 3000
## maternal_loss2025 0.03 0.02 0.00 0.06 3000
## maternal_loss2530 0.00 0.01 -0.03 0.03 3000
## maternal_loss3035 0.00 0.01 -0.02 0.02 3000
## maternal_loss3540 0.00 0.01 -0.02 0.03 3000
## maternal_loss4045 0.00 0.01 -0.02 0.01 3000
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000
## older_siblings1 0.02 0.01 0.00 0.03 1513
## older_siblings2 0.02 0.01 0.00 0.05 1406
## older_siblings3 0.03 0.02 0.00 0.07 1562
## older_siblings4 0.01 0.02 -0.04 0.05 1498
## older_siblings5P -0.06 0.03 -0.11 0.00 1482
## nr.siblings 0.04 0.00 0.03 0.04 1568
## last_born1 0.01 0.00 0.00 0.02 3000
## Rhat
## Intercept 1
## paternalage 1
## birth_cohort1950M1955 1
## birth_cohort1955M1960 1
## male1 1
## maternalage.factor1420 1
## maternalage.factor3561 1
## paternalage.mean 1
## paternal_loss01 1
## paternal_loss15 1
## paternal_loss510 1
## paternal_loss1015 1
## paternal_loss1520 1
## paternal_loss2025 1
## paternal_loss2530 1
## paternal_loss3035 1
## paternal_loss3540 1
## paternal_loss4045 1
## paternal_lossunclear 1
## maternal_loss01 1
## maternal_loss15 1
## maternal_loss510 1
## maternal_loss1015 1
## maternal_loss1520 1
## maternal_loss2025 1
## maternal_loss2530 1
## maternal_loss3035 1
## maternal_loss3540 1
## maternal_loss4045 1
## maternal_lossunclear 1
## older_siblings1 1
## older_siblings2 1
## older_siblings3 1
## older_siblings4 1
## older_siblings5P 1
## nr.siblings 1
## last_born1 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.076 | 2.016 | 2.136 |
paternalage | 0.9536 | 0.9274 | 0.9797 |
birth_cohort1950M1955 | 0.9952 | 0.9847 | 1.006 |
birth_cohort1955M1960 | 0.9976 | 0.9863 | 1.009 |
male1 | 0.9363 | 0.9286 | 0.9439 |
maternalage.factor1420 | 1.071 | 1.055 | 1.087 |
maternalage.factor3561 | 0.9937 | 0.9789 | 1.008 |
paternalage.mean | 1.002 | 0.9752 | 1.03 |
paternal_loss01 | 1.095 | 0.6913 | 1.673 |
paternal_loss15 | 0.9838 | 0.8748 | 1.103 |
paternal_loss510 | 0.9498 | 0.898 | 1.005 |
paternal_loss1015 | 0.9822 | 0.9482 | 1.016 |
paternal_loss1520 | 1.004 | 0.9778 | 1.031 |
paternal_loss2025 | 0.9963 | 0.974 | 1.019 |
paternal_loss2530 | 0.9869 | 0.9694 | 1.005 |
paternal_loss3035 | 0.9952 | 0.979 | 1.011 |
paternal_loss3540 | 0.9943 | 0.9795 | 1.009 |
paternal_loss4045 | 0.9865 | 0.9714 | 1.001 |
paternal_lossunclear | 0.9459 | 0.9349 | 0.9565 |
maternal_loss01 | 0.7712 | 0.5081 | 1.144 |
maternal_loss15 | 0.9377 | 0.7773 | 1.125 |
maternal_loss510 | 1.012 | 0.9278 | 1.105 |
maternal_loss1015 | 0.9851 | 0.9349 | 1.039 |
maternal_loss1520 | 0.9921 | 0.9528 | 1.033 |
maternal_loss2025 | 1.026 | 0.9951 | 1.058 |
maternal_loss2530 | 1.001 | 0.9739 | 1.029 |
maternal_loss3035 | 1.001 | 0.978 | 1.025 |
maternal_loss3540 | 1.005 | 0.984 | 1.026 |
maternal_loss4045 | 0.9961 | 0.9781 | 1.014 |
maternal_lossunclear | 0.9818 | 0.9719 | 0.9919 |
older_siblings1 | 1.017 | 1.003 | 1.03 |
older_siblings2 | 1.024 | 1.001 | 1.049 |
older_siblings3 | 1.034 | 1.001 | 1.069 |
older_siblings4 | 1.008 | 0.9654 | 1.056 |
older_siblings5P | 0.9443 | 0.8928 | 1.001 |
nr.siblings | 1.04 | 1.034 | 1.045 |
last_born1 | 1.011 | 1.002 | 1.02 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.93 | [1.89;1.96] | [1.91;1.95] |
estimate father 35y | 1.84 | [1.8;1.88] | [1.81;1.86] |
percentage change | -4.6 | [-7.26;-2.03] | [-6.4;-2.97] |
OR/IRR | 0.95 | [0.93;0.98] | [0.94;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r23_student_cauchy_priors.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
To demonstrate the robustness of our prior choice we use improper flat priors. These priors should make the model’s results comparable to a frequentist maximum likelihood approach.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 1 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 800; warmup = 300; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## sd ~ student_t(3, 0, 10)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.02 200 1.03
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.73 0.02 0.70 0.76 3000
## paternalage -0.05 0.01 -0.07 -0.02 990
## birth_cohort1950M1955 0.00 0.01 -0.02 0.01 3000
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 3000
## male1 -0.07 0.00 -0.07 -0.06 3000
## maternalage.factor1420 0.07 0.01 0.05 0.08 3000
## maternalage.factor3561 -0.01 0.01 -0.02 0.01 3000
## paternalage.mean 0.00 0.01 -0.03 0.03 1053
## paternal_loss01 0.09 0.22 -0.35 0.49 3000
## paternal_loss15 -0.02 0.06 -0.13 0.10 3000
## paternal_loss510 -0.05 0.03 -0.11 0.00 3000
## paternal_loss1015 -0.02 0.02 -0.05 0.02 3000
## paternal_loss1520 0.00 0.01 -0.02 0.03 3000
## paternal_loss2025 0.00 0.01 -0.03 0.02 3000
## paternal_loss2530 -0.01 0.01 -0.03 0.00 3000
## paternal_loss3035 0.00 0.01 -0.02 0.01 3000
## paternal_loss3540 -0.01 0.01 -0.02 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear -0.06 0.01 -0.07 -0.04 3000
## maternal_loss01 -0.26 0.21 -0.69 0.12 3000
## maternal_loss15 -0.07 0.09 -0.25 0.12 3000
## maternal_loss510 0.01 0.04 -0.07 0.09 3000
## maternal_loss1015 -0.02 0.03 -0.06 0.03 3000
## maternal_loss1520 -0.01 0.02 -0.05 0.03 3000
## maternal_loss2025 0.02 0.02 -0.01 0.06 3000
## maternal_loss2530 0.00 0.01 -0.03 0.03 3000
## maternal_loss3035 0.00 0.01 -0.02 0.02 3000
## maternal_loss3540 0.00 0.01 -0.01 0.02 3000
## maternal_loss4045 0.00 0.01 -0.02 0.01 3000
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000
## older_siblings1 0.02 0.01 0.00 0.03 1251
## older_siblings2 0.02 0.01 0.00 0.05 1086
## older_siblings3 0.03 0.02 0.00 0.07 1097
## older_siblings4 0.01 0.02 -0.04 0.05 1207
## older_siblings5P -0.06 0.03 -0.11 0.00 1142
## nr.siblings 0.04 0.00 0.03 0.04 1117
## last_born1 0.01 0.00 0.00 0.02 3000
## Rhat
## Intercept 1
## paternalage 1
## birth_cohort1950M1955 1
## birth_cohort1955M1960 1
## male1 1
## maternalage.factor1420 1
## maternalage.factor3561 1
## paternalage.mean 1
## paternal_loss01 1
## paternal_loss15 1
## paternal_loss510 1
## paternal_loss1015 1
## paternal_loss1520 1
## paternal_loss2025 1
## paternal_loss2530 1
## paternal_loss3035 1
## paternal_loss3540 1
## paternal_loss4045 1
## paternal_lossunclear 1
## maternal_loss01 1
## maternal_loss15 1
## maternal_loss510 1
## maternal_loss1015 1
## maternal_loss1520 1
## maternal_loss2025 1
## maternal_loss2530 1
## maternal_loss3035 1
## maternal_loss3540 1
## maternal_loss4045 1
## maternal_lossunclear 1
## older_siblings1 1
## older_siblings2 1
## older_siblings3 1
## older_siblings4 1
## older_siblings5P 1
## nr.siblings 1
## last_born1 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.076 | 2.015 | 2.138 |
paternalage | 0.9531 | 0.9287 | 0.9793 |
birth_cohort1950M1955 | 0.9952 | 0.985 | 1.006 |
birth_cohort1955M1960 | 0.9976 | 0.9867 | 1.009 |
male1 | 0.9365 | 0.929 | 0.9438 |
maternalage.factor1420 | 1.071 | 1.054 | 1.086 |
maternalage.factor3561 | 0.9936 | 0.9785 | 1.009 |
paternalage.mean | 1.002 | 0.9753 | 1.028 |
paternal_loss01 | 1.097 | 0.7049 | 1.637 |
paternal_loss15 | 0.9829 | 0.8744 | 1.102 |
paternal_loss510 | 0.9489 | 0.8977 | 1.003 |
paternal_loss1015 | 0.9823 | 0.9482 | 1.018 |
paternal_loss1520 | 1.005 | 0.9777 | 1.032 |
paternal_loss2025 | 0.9957 | 0.9742 | 1.018 |
paternal_loss2530 | 0.9869 | 0.9696 | 1.005 |
paternal_loss3035 | 0.9952 | 0.979 | 1.012 |
paternal_loss3540 | 0.9944 | 0.9795 | 1.01 |
paternal_loss4045 | 0.9864 | 0.9717 | 1.001 |
paternal_lossunclear | 0.9458 | 0.9353 | 0.9568 |
maternal_loss01 | 0.7722 | 0.5028 | 1.132 |
maternal_loss15 | 0.9334 | 0.7799 | 1.122 |
maternal_loss510 | 1.011 | 0.9287 | 1.099 |
maternal_loss1015 | 0.9839 | 0.9372 | 1.034 |
maternal_loss1520 | 0.9919 | 0.9531 | 1.034 |
maternal_loss2025 | 1.025 | 0.9901 | 1.061 |
maternal_loss2530 | 1.001 | 0.9717 | 1.029 |
maternal_loss3035 | 1.001 | 0.9776 | 1.025 |
maternal_loss3540 | 1.005 | 0.9852 | 1.024 |
maternal_loss4045 | 0.9962 | 0.9774 | 1.015 |
maternal_lossunclear | 0.9817 | 0.9716 | 0.9915 |
older_siblings1 | 1.017 | 1.002 | 1.03 |
older_siblings2 | 1.024 | 1.001 | 1.047 |
older_siblings3 | 1.034 | 1.001 | 1.068 |
older_siblings4 | 1.009 | 0.9651 | 1.054 |
older_siblings5P | 0.945 | 0.8926 | 0.9985 |
nr.siblings | 1.04 | 1.035 | 1.046 |
last_born1 | 1.011 | 1.002 | 1.019 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.93 | [1.9;1.96] | [1.91;1.95] |
estimate father 35y | 1.84 | [1.8;1.88] | [1.81;1.86] |
percentage change | -4.71 | [-7.13;-2.07] | [-6.32;-2.97] |
OR/IRR | 0.95 | [0.93;0.98] | [0.94;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r24_uniform_priors.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
In the three historical populations, records were kept in the parish. Although records were linked between parishes in all populations, except three out of four provinces in historical Sweden, migration might sometimes lead to censoring of records. Adjusting for migration may however constitute a partial adjustment for the outcome, as lower offspring fitness might make them more likely to migrate. Hence, we show the results of doing so as a robustness analysis. In all analyses, we adjusted for a “migrated”-dummy variable. Migration was differently defined depending on the population. In Québec, we had flags denoting immigrants and emigrants. Few immigrants were included in our analyses anyway, as we needed parental information for our analyses. Emigrants were people who left Québec. In historical Sweden, migration was logged as migration from the parish of birth. In the Krummhörn, we set migrated to true, when the parish of death/burial differed from the parish of birth/baptism.
No migration information was available in 20th-century Sweden, but records there weren’t kept in parishes, so this should not pose a problem.
In this model, we adjust for maternal age using a continuous variable. We also adjust for a dummy variable for teenage motherhood, to account for the nonlinearity of the maternal age effect. Moreover, we use separate random intercepts for mothers and fathers and adjust for the mother’s mean age at birth and the father’s mean age at birth. This model only converges in the 20th-century Sweden data, because there are sufficient numbers of divorces and remarriages and enough data to separate the parents’ contributions.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + maternalage + young_mother + paternalage.mean + birth_cohort + male + maternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idPere) + (1 | idMere)
## Data: model_data (Number of observations: 1408212)
## Samples: 6 chains, each with iter = 800; warmup = 300; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idMere (Number of levels: 861310)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0 0 0 0.01 343 1.01
##
## ~idPere (Number of levels: 859618)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0 0 0 0.01 199 1.02
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 0.79 0.00 0.78 0.80 3000 1
## paternalage -0.02 0.01 -0.04 0.00 2937 1
## maternalage -0.05 0.01 -0.06 -0.03 2929 1
## young_mother 0.05 0.00 0.04 0.05 2950 1
## paternalage.mean -0.01 0.01 -0.03 0.01 2928 1
## birth_cohort1950M1955 0.00 0.00 0.00 0.00 3000 1
## birth_cohort1955M1960 0.00 0.00 0.00 0.01 3000 1
## male1 -0.06 0.00 -0.07 -0.06 2713 1
## maternalage.mean 0.01 0.01 -0.01 0.02 2927 1
## paternal_loss01 0.11 0.05 0.01 0.22 3000 1
## paternal_loss15 0.03 0.02 -0.01 0.07 3000 1
## paternal_loss510 -0.02 0.01 -0.03 0.00 3000 1
## paternal_loss1015 0.00 0.01 -0.02 0.01 3000 1
## paternal_loss1520 0.00 0.00 0.00 0.01 3000 1
## paternal_loss2025 0.00 0.00 -0.01 0.00 3000 1
## paternal_loss2530 0.00 0.00 0.00 0.01 3000 1
## paternal_loss3035 0.00 0.00 0.00 0.01 3000 1
## paternal_loss3540 0.00 0.00 -0.01 0.00 3000 1
## paternal_loss4045 -0.01 0.00 -0.01 0.00 3000 1
## paternal_lossunclear -0.05 0.00 -0.06 -0.05 2973 1
## maternal_loss01 -0.21 0.07 -0.35 -0.07 3000 1
## maternal_loss15 -0.06 0.03 -0.12 0.00 3000 1
## maternal_loss510 -0.01 0.01 -0.03 0.02 3000 1
## maternal_loss1015 0.00 0.01 -0.02 0.01 3000 1
## maternal_loss1520 0.00 0.01 -0.01 0.01 3000 1
## maternal_loss2025 0.01 0.01 0.00 0.02 3000 1
## maternal_loss2530 0.01 0.00 0.00 0.02 3000 1
## maternal_loss3035 0.01 0.00 0.00 0.02 2971 1
## maternal_loss3540 0.00 0.00 -0.01 0.00 2995 1
## maternal_loss4045 0.00 0.00 0.00 0.01 3000 1
## maternal_lossunclear -0.03 0.00 -0.03 -0.02 3000 1
## older_siblings1 0.02 0.00 0.02 0.02 3000 1
## older_siblings2 0.03 0.00 0.02 0.03 3000 1
## older_siblings3 0.02 0.00 0.02 0.03 3000 1
## older_siblings4 0.01 0.01 0.00 0.02 3000 1
## older_siblings5P -0.04 0.01 -0.05 -0.02 3000 1
## nr.siblings 0.04 0.00 0.03 0.04 3000 1
## last_born1 0.01 0.00 0.01 0.01 3000 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 2.211 | 2.191 | 2.231 |
paternalage | 0.9779 | 0.9612 | 0.9956 |
maternalage | 0.9557 | 0.9384 | 0.9729 |
young_mother | 1.046 | 1.041 | 1.052 |
paternalage.mean | 0.9922 | 0.9745 | 1.009 |
birth_cohort1950M1955 | 1.001 | 0.9976 | 1.004 |
birth_cohort1955M1960 | 1.002 | 0.9977 | 1.006 |
male1 | 0.9384 | 0.9361 | 0.9407 |
maternalage.mean | 1.007 | 0.989 | 1.025 |
paternal_loss01 | 1.121 | 1.01 | 1.242 |
paternal_loss15 | 1.03 | 0.9924 | 1.068 |
paternal_loss510 | 0.9825 | 0.966 | 0.9999 |
paternal_loss1015 | 0.9953 | 0.9849 | 1.006 |
paternal_loss1520 | 1.004 | 0.997 | 1.012 |
paternal_loss2025 | 0.9984 | 0.992 | 1.005 |
paternal_loss2530 | 1.004 | 0.9981 | 1.009 |
paternal_loss3035 | 1.001 | 0.9956 | 1.006 |
paternal_loss3540 | 0.9964 | 0.9921 | 1.001 |
paternal_loss4045 | 0.9918 | 0.9873 | 0.9962 |
paternal_lossunclear | 0.9465 | 0.9434 | 0.9497 |
maternal_loss01 | 0.8105 | 0.7019 | 0.9313 |
maternal_loss15 | 0.9438 | 0.8888 | 1.002 |
maternal_loss510 | 0.9918 | 0.9662 | 1.018 |
maternal_loss1015 | 0.9971 | 0.981 | 1.013 |
maternal_loss1520 | 0.9982 | 0.9866 | 1.01 |
maternal_loss2025 | 1.011 | 1.001 | 1.021 |
maternal_loss2530 | 1.008 | 1 | 1.017 |
maternal_loss3035 | 1.009 | 1.002 | 1.016 |
maternal_loss3540 | 0.9988 | 0.9929 | 1.005 |
maternal_loss4045 | 1.002 | 0.9963 | 1.007 |
maternal_lossunclear | 0.9747 | 0.972 | 0.9775 |
older_siblings1 | 1.019 | 1.015 | 1.022 |
older_siblings2 | 1.028 | 1.022 | 1.033 |
older_siblings3 | 1.024 | 1.016 | 1.032 |
older_siblings4 | 1.012 | 1.001 | 1.023 |
older_siblings5P | 0.964 | 0.952 | 0.9769 |
nr.siblings | 1.037 | 1.035 | 1.038 |
last_born1 | 1.009 | 1.007 | 1.012 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.9 | [1.88;1.92] | [1.89;1.92] |
estimate father 35y | 1.86 | [1.84;1.88] | [1.85;1.87] |
percentage change | -2.22 | [-3.88;-0.44] | [-3.33;-1.11] |
OR/IRR | 0.98 | [0.96;1] | [0.97;0.99] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/r26_separate_parental_age_contributions.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Here we show the effect of paternal age for each episode.
In reference to m3, the main reported model, the robustness models were implemented as follows: r1 relaxed exclusion criteria (not in 20th-century Sweden), r2 had only birth cohort as a covariate, r3 adjusted for birth order as a continuous variable, r4 adjusted for number of dependent siblings instead of birth order, r5 interacted birth order with number of siblings, r6 did not adjust for birth order, r7 adjusted only for parental loss in the first 5 years, r8 adjusted for being the first-/last-born adult son, r9 adjusted for a continuous nonlinear thin-splate spline for birth year instead of 5-year bins, r10 added a group-level slope for paternal age, r11 included separate group-level effects for each parent instead of one per marriage, r12 added a moderation by anchor sex, r13 adjusted for paternal age at first birth, r14 compared a model with linear group fixed effects, r15 added a moderator by region and group-level effects by church parish (not in 20th-century Sweden), r16 was restricted to Skellefteå (only in historical Sweden), r17 simulated Down syndrome cases, r18 reversed hurdle Poisson and Poisson distribution for the respective populations, r19 used a normal distribution, r20 did not adjust for maternal age, r21 adjusted for maternal age as a continuous variable, r22 relaxed exclusion criteria and included 30 more years of birth cohorts, allowing for more potential censoring, r23 used Student’s t distributions for population-level priors and half-Cauchy priors for the family variance component, r24 used noninformative priors, which should lead to results comparable with maximum likelihood, r25 controlled for migration status (not in 20th-century Sweden), r26 separate parental age contributions (only in 20th-century Sweden).
Points show median estimates, the lines show 80% and 95% credibility intervals respectively.