Loading details
source("0__helpers.R")
opts_chunk$set(render = pander_handler, cache=F,tidy=FALSE,autodep=TRUE,dev='png',fig.width=12,fig.height=7.5, warning = F, message = F)
load("rpqa.rdata")
rpqa = data.table(rpqa); rpqa.1 = data.table(rpqa.1)
desc_theme = theme_minimal(base_size = 24) + theme( axis.line.y = element_blank(), axis.line.x = element_line(size = 1, color ="black"))
update_geom_defaults("bar", list(fill = "#6c92b2", alpha = 1/2))
demo_trends = aggDemoTrends(rpqa)
mymin = theme_minimal() +theme(panel.grid.major.y =element_blank(),panel.grid.major.x = element_line(colour="#eeeeee"))
rpqa[, paternalage := 10 * paternalage]
rpqa[, maternalage := 10 * maternalage]
rpqa[, age := 10 * age]
rpqa[, age_at_1st_child := 10 * age_at_1st_child]
rpqa[, age_at_last_child := 10 * age_at_last_child]
rpqa[, byear := year(bdate)]
rpqa.1[, paternalage := 10 * paternalage]
rpqa.1[, maternalage := 10 * maternalage]
rpqa.1[, age := 10 * age]
rpqa.1[, age_at_1st_child := 10 * age_at_1st_child]
rpqa.1[, age_at_last_child := 10 * age_at_last_child]
rpqa.1[, byear := year(bdate)]
Missingness patterns
The first table shows the number of missings per variable, the second table, using the indexes from the first, shows missings in which variables tend to occur together. Most variables of interest in this study are derived from these dates and so these patterns can show many cases did not have the data to calculate e.g. paternal loss (those lacking either the father’s death date, the anchor’s birth date or both).
pander_escape(missingness_patterns(rpqa[, list(
bdate, ddate, bdate.Father, ddate.Father, bdate.Mother, ddate.Mother
)]))
## index col missings
## 1 ddate 214777
## 2 ddate.Mother 139108
## 3 ddate.Father 131403
## 4 bdate.Father 57833
## 5 bdate.Mother 54978
## 6 bdate 31906
___________ |
166503 |
_ |
1__________ |
102985 |
ddate |
1_2________ |
28009 |
|
1_2_3_4_5_6 |
24048 |
|
1_2_3______ |
22747 |
|
____3______ |
21009 |
ddate.Father |
__2________ |
19750 |
ddate.Mother |
1___3______ |
17574 |
|
__2_3_4_5__ |
13719 |
|
__2_3______ |
11033 |
|
1_2_3_4_5__ |
6846 |
|
__2_3_4_5_6 |
2697 |
|
____3_4____ |
2465 |
|
1_2_3_4____ |
2198 |
|
1_2_____5__ |
2155 |
|
1___3_4____ |
2129 |
|
1_________6 |
1821 |
|
1_2_3___5__ |
1819 |
|
__________6 |
1678 |
bdate |
______4____ |
1131 |
bdate.Father |
__2_3_4____ |
1068 |
|
________5__ |
985 |
bdate.Mother |
__2_____5__ |
976 |
|
__2_3___5__ |
697 |
|
1_____4____ |
648 |
|
1_______5__ |
532 |
|
__2_______6 |
251 |
|
____3_____6 |
249 |
|
1_2___4____ |
216 |
|
1___3_____6 |
212 |
|
1_2_______6 |
181 |
|
__2_3_____6 |
154 |
|
1___3_4___6 |
146 |
|
__2___4____ |
138 |
|
1_2_3_____6 |
114 |
|
____3___5__ |
96 |
|
1___3___5__ |
95 |
|
1_2_3_4___6 |
67 |
|
____3_4___6 |
63 |
|
1_2_____5_6 |
52 |
|
1___3_4_5__ |
42 |
|
1_2___4_5__ |
39 |
|
1_2_3___5_6 |
33 |
|
____3_4_5__ |
33 |
|
__2___4_5__ |
30 |
|
__2_3_4___6 |
28 |
|
1_____4___6 |
18 |
|
______4___6 |
17 |
|
1_______5_6 |
14 |
|
1_____4_5__ |
13 |
|
__2_____5_6 |
12 |
|
__2_3___5_6 |
11 |
|
________5_6 |
9 |
|
1_2___4_5_6 |
8 |
|
1_2___4___6 |
8 |
|
1___3_4_5_6 |
8 |
|
______4_5__ |
5 |
|
__2___4___6 |
3 |
|
____3___5_6 |
2 |
|
__2___4_5_6 |
1 |
|
____3_4_5_6 |
1 |
|
Reproductive timing
ggplot(data = demo_trends) +
geom_line(aes(x= Year, y = first, linetype = "first", colour = Parent), size = 1) +
geom_line(aes(x = Year, y = all, linetype = "all", colour = Parent), size = 1) +
geom_line(aes(x= Year, y = last, linetype = "last", colour = Parent),size = 1) +
scale_colour_manual(values = c(Father = "#6c92b2", Mother = "#aec05d")) +
scale_linetype_manual("Birth", breaks = c("last", "all","first"), values = c( "solid","dashed", "dotted")) +
scale_y_continuous("Parental age at birth") +
xlim(1650,NA) +
geom_text(aes(x = Year, y = all + 0.5,
label = ifelse(Year %% 15 == 0, round(all), NA))) +
facet_wrap(~ Parent) +
desc_theme + theme(legend.position = c(1,1),
legend.justification = c(1,1),
legend.box = "horizontal",
panel.margin = unit(2, "lines"))

Correlations between variables
round(cor(rpqa[, list(
paternalage, maternalage, birthorder, nr.siblings, children, grandchildren, byear, byear.Father, age_at_1st_child, age_at_last_child
)], use = "pairwise.complete.obs"),2)
paternalage |
1 |
0.63 |
0.61 |
0.21 |
-0.01 |
0.02 |
-0.05 |
-0.31 |
0.03 |
0.02 |
maternalage |
0.63 |
1 |
0.72 |
0.17 |
-0.04 |
-0.04 |
0.05 |
-0.11 |
0.01 |
-0.02 |
birthorder |
0.61 |
0.72 |
1 |
0.59 |
-0.07 |
-0.06 |
0.11 |
-0.1 |
-0.07 |
-0.06 |
nr.siblings |
0.21 |
0.17 |
0.59 |
1 |
0.02 |
-0.02 |
-0.02 |
-0.18 |
-0.11 |
-0.1 |
children |
-0.01 |
-0.04 |
-0.07 |
0.02 |
1 |
0.61 |
-0.44 |
-0.43 |
-0.17 |
0.7 |
grandchildren |
0.02 |
-0.04 |
-0.06 |
-0.02 |
0.61 |
1 |
-0.49 |
-0.46 |
-0.06 |
0.41 |
byear |
-0.05 |
0.05 |
0.11 |
-0.02 |
-0.44 |
-0.49 |
1 |
0.96 |
-0.2 |
-0.45 |
byear.Father |
-0.31 |
-0.11 |
-0.1 |
-0.18 |
-0.43 |
-0.46 |
0.96 |
1 |
-0.12 |
-0.44 |
age_at_1st_child |
0.03 |
0.01 |
-0.07 |
-0.11 |
-0.17 |
-0.06 |
-0.2 |
-0.12 |
1 |
0.47 |
age_at_last_child |
0.02 |
-0.02 |
-0.06 |
-0.1 |
0.7 |
0.41 |
-0.45 |
-0.44 |
0.47 |
1 |
ggplot(data=rpqa, aes(x = byear, y = paternalage)) +
geom_linerange(stat = "summary", fun.data = "median_hilow", colour = "#aec05d") +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
desc_theme

ggplot(data=rpqa, aes(x = byear, y = age_at_1st_child)) +
geom_linerange(stat = "summary", fun.data = "median_hilow", colour = "#aec05d") +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
desc_theme

ggplot(data=rpqa, aes(x = byear, y = age_at_last_child)) +
geom_linerange(stat = "summary", fun.data = "median_hilow", colour = "#aec05d") +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
desc_theme

ggplot(data=rpqa, aes(x = byear, y = children)) +
geom_linerange(stat = "summary", fun.data = "median_hilow", colour = "#aec05d") +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
desc_theme

ggplot(data=rpqa, aes(x = byear, y = survive1y)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") +
desc_theme

ggplot(data=rpqa, aes(x = byear, y = surviveR)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") +
desc_theme

ggplot(data=rpqa, aes(x = children, y = grandchildren)) +
geom_jitter(colour = "#aec05d", alpha = I(0.1)) +
geom_pointrange(stat = "summary", fun.data = "median_hilow", colour = "#6c92b2") +
geom_smooth(method = "glm", formula = y ~ poly(x,3), colour = "#6e85b0") +
desc_theme

ggplot(data=rpqa, aes(x = children, y = children.surviving5y)) +
geom_jitter(colour = "#aec05d", alpha = I(0.1)) +
geom_pointrange(stat = "summary", fun.data = "median_hilow", colour = "#6c92b2") +
geom_smooth(method = "glm", formula = y ~ poly(x,3), colour = "#6e85b0") +
desc_theme

ggplot(data=rpqa, aes(x = round(age), y = children)) +
geom_jitter(colour = "#aec05d", alpha = I(0.1)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
geom_smooth(colour = "#6e85b0") +
xlab("Age") +
ylab("Number of children") +
desc_theme

ggplot(data=rpqa[children>0,], aes(x = round(age), y = children)) +
geom_jitter(colour = "#aec05d", alpha = I(0.1)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
geom_smooth(colour = "#6e85b0") +
xlab("Age") +
ylab("Number of children") +
desc_theme

plot_zero_infl(rpqa[ spouses > 0, ]$children)

ggplot(data=rpqa, aes(x = paternalage.factor, y = survive1y)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") +
desc_theme

ggplot(data=rpqa[spouses > 0, ], aes(x = paternalage.factor, y = children)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") +
desc_theme

Opportunities for selection
rpqa.1$birth.decade = round(rpqa.1$byear/5)*5
episodes = rpqa.1 %>%
filter(!is.na(male) | !is.na(survive1y) | !is.na(ever_married)) %>%
group_by(birth.decade) %>%
summarise(
"Population size" = as.numeric(length(idIndividu)),
"0. Number of children" = ifelse(between(birth.decade, 1670,1750), cva(children), NA_real_ ),
"1. Surviving first year" = ifelse(between(birth.decade, 1670, 1755),cva_bin(survive1y), NA_real_ ),
"2. Surviving to 15" = ifelse(between(birth.decade, 1670,1735), cva_bin(surviveR[survive1y==T]), NA_real_ ),
"3. Ever married" = ifelse(between(birth.decade, 1670,1720), cva_bin(ever_married[surviveR==1]), NA_real_ ),
"4. Number of children" = ifelse(between(birth.decade, 1670,1750), cva(children[ever_married==1]), NA_real_ ),
"5. Grandchildren" = ifelse(between(birth.decade, 1670,1670), cva(grandchildren[children>0]), NA_real_ )
) %>%
# mutate(male = Recode(male, "'NO'='female';''='male'")) %>%
data.table()
data.frame(episodes[order(birth.decade), ], check.names = F)
1670 |
1362 |
1.16 |
0.3481 |
0.206 |
0.4818 |
0.5963 |
0.7589 |
1675 |
2575 |
1.152 |
0.2827 |
0.2802 |
0.4755 |
0.5918 |
NA |
1680 |
2598 |
1.194 |
0.3456 |
0.3456 |
0.492 |
0.5668 |
NA |
1685 |
2621 |
1.225 |
0.4315 |
0.3557 |
0.4831 |
0.5743 |
NA |
1690 |
3072 |
1.186 |
0.4459 |
0.3217 |
0.489 |
0.5675 |
NA |
1695 |
3793 |
1.152 |
0.3861 |
0.3186 |
0.4767 |
0.5889 |
NA |
1700 |
5025 |
1.232 |
0.4737 |
0.3785 |
0.477 |
0.5806 |
NA |
1705 |
5181 |
1.184 |
0.4266 |
0.3267 |
0.4665 |
0.604 |
NA |
1710 |
5771 |
1.219 |
0.4166 |
0.3762 |
0.4767 |
0.6196 |
NA |
1715 |
6337 |
1.314 |
0.5281 |
0.3753 |
0.4776 |
0.6222 |
NA |
1720 |
7184 |
1.246 |
0.447 |
0.3729 |
0.4605 |
0.6389 |
NA |
1725 |
8520 |
1.373 |
0.4917 |
0.4278 |
NA |
0.669 |
NA |
1730 |
10200 |
1.55 |
0.5842 |
0.4982 |
NA |
0.6784 |
NA |
1735 |
11112 |
1.463 |
0.5627 |
0.3746 |
NA |
0.696 |
NA |
1740 |
4544 |
1.386 |
0.5384 |
NA |
NA |
0.6639 |
NA |
save(episodes, file = "coefs/rpqa_episodes.rdata")
# rpqa.1 = merge(rpqa.1, episodes, by = "birth.decade", all.x = T)
(episodes.plot = ggplot(melt(episodes,id.vars=c('birth.decade'), na.rm = T)) + geom_line(aes(x=birth.decade, y=value)) + facet_wrap(~ variable,scales='free_y',ncol = 1)) + mymin
