source("0__helpers.R")
opts_chunk$set(warning=TRUE, cache=F,cache.lazy=F,tidy=FALSE,autodep=TRUE,dev=c('png','pdf'),fig.width=20,fig.height=12.5,out.width='1440px',out.height='900px',cache.extra=file.info('swed1.rdata')[, 'mtime'])
make_path = function(file) {
get_coefficient_path(file, "swed")
}
# options for each chunk calling knit_child
opts_chunk$set(warning=FALSE, message = FALSE, echo = FALSE)
The swed_subset_children.1
dataset is based on the full dataset of all participants where paternal age is known and birth years are from 1947 to 1959. The subset contains 88707 randomly drawn participants from 56679 families.
All of the following models are the same as our main model m3, except for the noted changes to test robustness.
Here, we tested whether the effect on reproductive success is mediated by age (mortality). Unfortunately for the cohort we use for computing an effect on number of children, we have both left and right censoring (because the death register was only established in 1962, so some people may have died before then, and because many people born in 1958 are still alive). So we can only include people who died after their 11th-15th birthday, and have to enter an unknown age for those who have not died before their 51st-63rd birthday.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
## Warning: There were 1 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See http://mc-stan.org/misc/
## warnings.html#divergent-transitions-after-warmup
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + age + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 127284)
## Samples: 6 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 80000)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.01 0.01 0 0.02 517 1
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.41 0.02 0.36 0.45 3000
## paternalage -0.05 0.01 -0.08 -0.02 1281
## age5063 0.19 0.02 0.15 0.23 3000
## age025 -2.94 0.14 -3.22 -2.67 3000
## ageunknown 0.34 0.02 0.31 0.37 3000
## birth_cohort1950M1955 -0.01 0.01 -0.02 0.00 3000
## birth_cohort1955M1960 0.00 0.01 -0.01 0.01 3000
## male1 -0.06 0.00 -0.07 -0.05 3000
## maternalage.factor1420 0.07 0.01 0.06 0.09 3000
## maternalage.factor3561 -0.01 0.01 -0.02 0.01 3000
## paternalage.mean 0.00 0.01 -0.02 0.03 1290
## paternal_loss01 0.07 0.22 -0.38 0.47 3000
## paternal_loss15 -0.01 0.06 -0.13 0.10 3000
## paternal_loss510 -0.05 0.03 -0.11 0.01 3000
## paternal_loss1015 -0.02 0.02 -0.06 0.01 3000
## paternal_loss1520 0.00 0.01 -0.02 0.03 3000
## paternal_loss2025 0.00 0.01 -0.03 0.02 3000
## paternal_loss2530 -0.02 0.01 -0.03 0.00 3000
## paternal_loss3035 -0.01 0.01 -0.02 0.01 3000
## paternal_loss3540 -0.01 0.01 -0.02 0.01 3000
## paternal_loss4045 -0.01 0.01 -0.03 0.00 3000
## paternal_lossunclear -0.06 0.01 -0.07 -0.05 3000
## maternal_loss01 -0.19 0.21 -0.61 0.19 3000
## maternal_loss15 -0.03 0.09 -0.22 0.15 3000
## maternal_loss510 0.03 0.04 -0.06 0.11 3000
## maternal_loss1015 -0.01 0.03 -0.07 0.04 3000
## maternal_loss1520 -0.01 0.02 -0.04 0.03 3000
## maternal_loss2025 0.02 0.02 -0.01 0.05 3000
## maternal_loss2530 0.00 0.01 -0.03 0.03 3000
## maternal_loss3035 0.00 0.01 -0.02 0.02 3000
## maternal_loss3540 0.00 0.01 -0.02 0.02 3000
## maternal_loss4045 0.00 0.01 -0.02 0.02 3000
## maternal_lossunclear -0.02 0.01 -0.03 -0.01 3000
## older_siblings1 0.02 0.01 0.00 0.03 1523
## older_siblings2 0.03 0.01 0.00 0.05 1369
## older_siblings3 0.03 0.02 0.00 0.07 1265
## older_siblings4 0.01 0.02 -0.03 0.06 1427
## older_siblings5P -0.06 0.03 -0.12 0.00 1258
## nr.siblings 0.04 0.00 0.03 0.04 1357
## last_born1 0.01 0.00 0.00 0.02 3000
## Rhat
## Intercept 1
## paternalage 1
## age5063 1
## age025 1
## ageunknown 1
## birth_cohort1950M1955 1
## birth_cohort1955M1960 1
## male1 1
## maternalage.factor1420 1
## maternalage.factor3561 1
## paternalage.mean 1
## paternal_loss01 1
## paternal_loss15 1
## paternal_loss510 1
## paternal_loss1015 1
## paternal_loss1520 1
## paternal_loss2025 1
## paternal_loss2530 1
## paternal_loss3035 1
## paternal_loss3540 1
## paternal_loss4045 1
## paternal_lossunclear 1
## maternal_loss01 1
## maternal_loss15 1
## maternal_loss510 1
## maternal_loss1015 1
## maternal_loss1520 1
## maternal_loss2025 1
## maternal_loss2530 1
## maternal_loss3035 1
## maternal_loss3540 1
## maternal_loss4045 1
## maternal_lossunclear 1
## older_siblings1 1
## older_siblings2 1
## older_siblings3 1
## older_siblings4 1
## older_siblings5P 1
## nr.siblings 1
## last_born1 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 1.5 | 1.439 | 1.562 |
paternalage | 0.9516 | 0.926 | 0.9772 |
age5063 | 1.212 | 1.164 | 1.263 |
age025 | 0.05311 | 0.04001 | 0.06917 |
ageunknown | 1.409 | 1.367 | 1.454 |
birth_cohort1950M1955 | 0.9936 | 0.9829 | 1.004 |
birth_cohort1955M1960 | 0.9969 | 0.9858 | 1.008 |
male1 | 0.9444 | 0.9369 | 0.952 |
maternalage.factor1420 | 1.074 | 1.058 | 1.09 |
maternalage.factor3561 | 0.9922 | 0.9775 | 1.007 |
paternalage.mean | 1.003 | 0.9757 | 1.03 |
paternal_loss01 | 1.075 | 0.6867 | 1.603 |
paternal_loss15 | 0.9884 | 0.8753 | 1.111 |
paternal_loss510 | 0.9528 | 0.8998 | 1.009 |
paternal_loss1015 | 0.979 | 0.9454 | 1.014 |
paternal_loss1520 | 1.003 | 0.9761 | 1.031 |
paternal_loss2025 | 0.9959 | 0.9753 | 1.017 |
paternal_loss2530 | 0.9848 | 0.9672 | 1.003 |
paternal_loss3035 | 0.9933 | 0.9766 | 1.009 |
paternal_loss3540 | 0.9933 | 0.9787 | 1.008 |
paternal_loss4045 | 0.9883 | 0.9735 | 1.003 |
paternal_lossunclear | 0.9443 | 0.9337 | 0.9552 |
maternal_loss01 | 0.829 | 0.5418 | 1.208 |
maternal_loss15 | 0.9694 | 0.8032 | 1.164 |
maternal_loss510 | 1.029 | 0.9465 | 1.114 |
maternal_loss1015 | 0.9861 | 0.9352 | 1.042 |
maternal_loss1520 | 0.9948 | 0.9573 | 1.032 |
maternal_loss2025 | 1.023 | 0.99 | 1.056 |
maternal_loss2530 | 1 | 0.9727 | 1.029 |
maternal_loss3035 | 1 | 0.9763 | 1.025 |
maternal_loss3540 | 1.003 | 0.9842 | 1.023 |
maternal_loss4045 | 0.9963 | 0.9778 | 1.016 |
maternal_lossunclear | 0.9789 | 0.9687 | 0.9887 |
older_siblings1 | 1.017 | 1.003 | 1.03 |
older_siblings2 | 1.026 | 1.003 | 1.049 |
older_siblings3 | 1.034 | 1.001 | 1.07 |
older_siblings4 | 1.011 | 0.968 | 1.058 |
older_siblings5P | 0.9442 | 0.8909 | 0.9997 |
nr.siblings | 1.04 | 1.035 | 1.046 |
last_born1 | 1.01 | 1.001 | 1.019 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 1.39 | [1.34;1.44] | [1.36;1.42] |
estimate father 35y | 1.32 | [1.28;1.37] | [1.29;1.35] |
percentage change | -4.87 | [-7.4;-2.28] | [-6.48;-3.13] |
OR/IRR | 0.95 | [0.93;0.98] | [0.94;0.97] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/s1_control_age.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")
Here, we tested whether the paternal age effect on reproductive succes is mediated by reproductive timing (as indexed by anchors’ ages at first and last birth). Because age at first and last birth are by definition only available for anchors who had at least one child, this analysis has to be restricted to such anchors. Hence, paternal age effects on mortality until age 1 and 15 cannot, in principle, be mediated by reproductive timing of the anchors.
model_summary = summary(model, use_cache = FALSE, priors = TRUE)
print(model_summary)
## Family: poisson(log)
## Formula: children ~ paternalage + age_at_1st_child + age_at_last_child + birth_cohort + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
## Data: model_data (Number of observations: 101944)
## Samples: 6 chains, each with iter = 800; warmup = 300; thin = 1;
## total post-warmup samples = 3000
## WAIC: Not computed
##
## Priors:
## b ~ normal(0,5)
## sd ~ student_t(3, 0, 5)
##
## Group-Level Effects:
## ~idParents (Number of levels: 68647)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0 0 0 0.01 3000 1
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 0.47 0.02 0.44 0.51 3000
## paternalage 0.00 0.01 -0.03 0.02 1003
## age_at_1st_child -0.54 0.00 -0.55 -0.53 3000
## age_at_last_child 0.51 0.00 0.51 0.52 3000
## birth_cohort1950M1955 0.02 0.01 0.01 0.03 3000
## birth_cohort1955M1960 0.06 0.01 0.05 0.07 3000
## male1 0.00 0.00 -0.01 0.01 3000
## maternalage.factor1420 -0.01 0.01 -0.03 0.00 3000
## maternalage.factor3561 0.01 0.01 0.00 0.02 3000
## paternalage.mean 0.02 0.01 0.00 0.05 992
## paternal_loss01 -0.14 0.21 -0.56 0.26 3000
## paternal_loss15 -0.03 0.06 -0.15 0.09 3000
## paternal_loss510 -0.02 0.03 -0.08 0.04 3000
## paternal_loss1015 -0.02 0.02 -0.05 0.02 3000
## paternal_loss1520 -0.02 0.01 -0.05 0.01 3000
## paternal_loss2025 -0.02 0.01 -0.04 0.00 3000
## paternal_loss2530 -0.01 0.01 -0.03 0.00 3000
## paternal_loss3035 -0.01 0.01 -0.03 0.01 3000
## paternal_loss3540 -0.01 0.01 -0.03 0.00 3000
## paternal_loss4045 -0.01 0.01 -0.02 0.01 3000
## paternal_lossunclear 0.01 0.01 0.00 0.02 3000
## maternal_loss01 -0.06 0.22 -0.52 0.36 3000
## maternal_loss15 0.02 0.10 -0.17 0.20 3000
## maternal_loss510 0.01 0.04 -0.07 0.09 3000
## maternal_loss1015 0.00 0.03 -0.05 0.06 3000
## maternal_loss1520 -0.01 0.02 -0.05 0.03 3000
## maternal_loss2025 0.00 0.02 -0.04 0.03 3000
## maternal_loss2530 -0.01 0.01 -0.03 0.02 3000
## maternal_loss3035 0.00 0.01 -0.02 0.03 3000
## maternal_loss3540 -0.01 0.01 -0.03 0.01 3000
## maternal_loss4045 0.00 0.01 -0.02 0.01 3000
## maternal_lossunclear 0.01 0.01 0.00 0.02 3000
## older_siblings1 0.00 0.01 -0.01 0.01 1188
## older_siblings2 0.00 0.01 -0.02 0.02 1113
## older_siblings3 0.00 0.02 -0.03 0.03 913
## older_siblings4 -0.01 0.02 -0.06 0.03 1258
## older_siblings5P -0.03 0.03 -0.09 0.02 1110
## nr.siblings 0.01 0.00 0.00 0.02 1180
## last_born1 0.00 0.00 -0.01 0.01 3000
## Rhat
## Intercept 1
## paternalage 1
## age_at_1st_child 1
## age_at_last_child 1
## birth_cohort1950M1955 1
## birth_cohort1955M1960 1
## male1 1
## maternalage.factor1420 1
## maternalage.factor3561 1
## paternalage.mean 1
## paternal_loss01 1
## paternal_loss15 1
## paternal_loss510 1
## paternal_loss1015 1
## paternal_loss1520 1
## paternal_loss2025 1
## paternal_loss2530 1
## paternal_loss3035 1
## paternal_loss3540 1
## paternal_loss4045 1
## paternal_lossunclear 1
## maternal_loss01 1
## maternal_loss15 1
## maternal_loss510 1
## maternal_loss1015 1
## maternal_loss1520 1
## maternal_loss2025 1
## maternal_loss2530 1
## maternal_loss3035 1
## maternal_loss3540 1
## maternal_loss4045 1
## maternal_lossunclear 1
## older_siblings1 1
## older_siblings2 1
## older_siblings3 1
## older_siblings4 1
## older_siblings5P 1
## nr.siblings 1
## last_born1 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Estimates are exp(b)
. When they are referring to the hurdle (hu) component, or a dichotomous outcome, they are odds ratios, when they are referring to a Poisson component, they are hazard ratios. In both cases, they are presented with 95% credibility intervals. To see the effects on the response scale (probability or number of children), consult the marginal effect plots.
fixed_eff = data.frame(model_summary$fixed, check.names = F)
fixed_eff$Est.Error = fixed_eff$Eff.Sample = fixed_eff$Rhat = NULL
fixed_eff$`Odds/hazard ratio` = exp(fixed_eff$Estimate)
fixed_eff$`OR/HR low 95%` = exp(fixed_eff$`l-95% CI`)
fixed_eff$`OR/HR high 95%` = exp(fixed_eff$`u-95% CI`)
fixed_eff = fixed_eff %>% select(`Odds/hazard ratio`, `OR/HR low 95%`, `OR/HR high 95%`)
pander::pander(fixed_eff)
Odds/hazard ratio | OR/HR low 95% | OR/HR high 95% | |
---|---|---|---|
Intercept | 1.607 | 1.549 | 1.666 |
paternalage | 0.9963 | 0.9703 | 1.023 |
age_at_1st_child | 0.5841 | 0.5791 | 0.5893 |
age_at_last_child | 1.669 | 1.658 | 1.681 |
birth_cohort1950M1955 | 1.021 | 1.01 | 1.032 |
birth_cohort1955M1960 | 1.065 | 1.053 | 1.077 |
male1 | 1 | 0.9921 | 1.009 |
maternalage.factor1420 | 0.9871 | 0.9716 | 1.002 |
maternalage.factor3561 | 1.01 | 0.9954 | 1.024 |
paternalage.mean | 1.023 | 0.9964 | 1.051 |
paternal_loss01 | 0.8711 | 0.5735 | 1.291 |
paternal_loss15 | 0.9709 | 0.8604 | 1.09 |
paternal_loss510 | 0.9806 | 0.9247 | 1.037 |
paternal_loss1015 | 0.9847 | 0.9511 | 1.02 |
paternal_loss1520 | 0.9804 | 0.9557 | 1.007 |
paternal_loss2025 | 0.9778 | 0.9578 | 0.9983 |
paternal_loss2530 | 0.9852 | 0.9684 | 1.003 |
paternal_loss3035 | 0.9898 | 0.9738 | 1.006 |
paternal_loss3540 | 0.9897 | 0.9751 | 1.005 |
paternal_loss4045 | 0.9915 | 0.9762 | 1.006 |
paternal_lossunclear | 1.009 | 0.9981 | 1.02 |
maternal_loss01 | 0.9444 | 0.5949 | 1.429 |
maternal_loss15 | 1.022 | 0.8448 | 1.227 |
maternal_loss510 | 1.012 | 0.93 | 1.096 |
maternal_loss1015 | 1.003 | 0.9495 | 1.059 |
maternal_loss1520 | 0.9896 | 0.9518 | 1.028 |
maternal_loss2025 | 0.9951 | 0.9646 | 1.027 |
maternal_loss2530 | 0.9931 | 0.9661 | 1.02 |
maternal_loss3035 | 1.003 | 0.9799 | 1.027 |
maternal_loss3540 | 0.9902 | 0.971 | 1.01 |
maternal_loss4045 | 0.9954 | 0.977 | 1.015 |
maternal_lossunclear | 1.006 | 0.9959 | 1.016 |
older_siblings1 | 1.001 | 0.9876 | 1.015 |
older_siblings2 | 0.9997 | 0.9772 | 1.023 |
older_siblings3 | 0.9999 | 0.9666 | 1.033 |
older_siblings4 | 0.9895 | 0.9457 | 1.034 |
older_siblings5P | 0.9688 | 0.9144 | 1.024 |
nr.siblings | 1.01 | 1.005 | 1.015 |
last_born1 | 1.003 | 0.9942 | 1.013 |
pander::pander(paternal_age_10y_effect(model))
effect | median_estimate | ci_95 | ci_80 |
---|---|---|---|
estimate father 25y | 2.22 | [2.19;2.26] | [2.2;2.25] |
estimate father 35y | 2.22 | [2.16;2.27] | [2.18;2.25] |
percentage change | -0.39 | [-2.97;2.34] | [-2.11;1.38] |
OR/IRR | 1 | [0.97;1.02] | [0.98;1.01] |
In these marginal effect plots, we set all predictors except the one shown on the X axis to their mean and in the case of factors to their reference level. We then plot the estimated association between the X axis predictor and the outcome on the response scale (e.g. probability of survival/marriage or number of children).
plot.brmsMarginalEffects_shades(
x = marginal_effects(model, re_formula = NA, probs = c(0.025,0.975)),
y = marginal_effects(model, re_formula = NA, probs = c(0.1,0.9)),
ask = FALSE)
Here, we plotted the 95% posterior densities for the unexponentiated model coefficients (b_
). The darkly shaded area represents the 50% credibility interval, the dark line represent the posterior mean estimate.
mcmc_areas(as.matrix(model$fit), regex_pars = "b_[^I]", point_est = "mean", prob = 0.50, prob_outer = 0.95) + ggtitle("Posterior densities with means and 50% intervals") + analysis_theme + theme(axis.text = element_text(size = 12), panel.grid = element_blank()) + xlab("Coefficient size")
These plots were made to diagnose misfit and nonconvergence.
In posterior predictive checks, we test whether we can approximately reproduce the real data distribution from our model.
brms::pp_check(model, re_formula = NA, type = "dens_overlay")
brms::pp_check(model, re_formula = NA, type = "hist")
Did the 6 chains converge?
stanplot(model, pars = "^b_[^I]", type = 'rhat')
stanplot(model, pars = "^b", type = 'neff_hist')
Trace plots are only shown in the case of nonconvergence.
if(any( summary(model)$fixed[,"Rhat"] > 1.1)) { # only do traceplots if not converged
plot(model, N = 3, ask = FALSE)
}
This model was stored in the file: coefs/swed/s2_reproductive_timing.rds.
Click the following link to see the script used to generate this model:
opts_chunk$set(echo = FALSE)
clusterscript = str_replace(basename(model_filename), "\\.rds",".html")
cat("[Cluster script](" , clusterscript, ")", sep = "")