Krummhörn descriptives

Loading details

source("0__helpers.R")
opts_chunk$set(render = pander_handler, tidy=FALSE, autodep=TRUE, dev='png', fig.width=12, fig.height=7.5, cache = FALSE, warning = FALSE, message = FALSE)
load("krmh.rdata")
desc_theme = theme_minimal(base_size = 24) 
update_geom_defaults("bar",   list(fill = "#6c92b2", alpha = 1/2))
demo_trends = aggDemoTrends(krmh)

mymin = theme_minimal() +theme(panel.grid.major.y =element_blank(),panel.grid.major.x = element_line(colour="#eeeeee"))

krmh[, paternalage := 10 * paternalage]; krmh.1[, paternalage := 10 * paternalage]
## Error in `:=`(paternalage, 10 * paternalage): Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").
krmh[, maternalage := 10 * maternalage]; krmh.1[, maternalage := 10 * maternalage]
## Error in `:=`(maternalage, 10 * maternalage): Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").
krmh[, age := 10 * age]; krmh.1[, age := 10 * age]
## Error in `:=`(age, 10 * age): Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").
krmh[, age_at_1st_child := 10 * age_at_1st_child]; krmh.1[, age_at_1st_child := 10 * age_at_1st_child]
## Error in `:=`(age_at_1st_child, 10 * age_at_1st_child): Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").
krmh[, age_at_last_child := 10 * age_at_last_child]; krmh.1[, age_at_last_child := 10 * age_at_last_child]
## Error in `:=`(age_at_last_child, 10 * age_at_last_child): Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").
krmh[, byear := year(bdate)]; krmh.1[, byear := year(bdate)]
## Error in `:=`(byear, year(bdate)): Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").
krmh[, byear.Father := year(bdate.Father)];krmh.1[, byear.Father := year(bdate.Father)]
## Error in `:=`(byear.Father, year(bdate.Father)): Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").

Variable descriptives

The following statistics refer to all births. This means that e.g. the percentage of ever married people includes some who died before they had a chance at marriage.

Whole population (N = 80808)

descriptives = psych::describe(krmh[, list(
    paternalage, maternalage, nr.siblings, dependent_sibs_f5y, age, spouses, children, grandchildren, byear, byear.Father, age_at_1st_child, age_at_last_child  )])
round(data.frame(descriptives)[,2:12],2)
  n mean sd median trimmed mad min max range skew kurtosis
paternalage 35907 34.97 7.35 34.09 34.5 7.39 16.07 74.48 58.4 0.65 0.45
maternalage 36371 31.49 5.97 31.15 31.36 6.63 14.62 54.62 40.01 0.18 -0.65
nr.siblings 80808 3.94 2.91 4 3.76 2.97 0 17 17 0.46 -0.17
dependent_sibs_f5y 80808 1.74 1.39 2 1.62 1.48 0 14 14 0.82 1.76
age 24973 20.03 26.2 5.29 15.38 7.79 0 100.8 100.8 1.21 0.07
spouses 80808 0.36 0.57 0 0.27 0 0 5 5 1.48 2.08
children 80808 1.12 2.36 0 0.5 0 0 20 20 2.27 4.74
grandchildren 80808 1.26 4.21 0 0.09 0 0 62 62 4.38 23.1
byear 75018 1796 55.27 1800 1798 63.75 1572 1932 360 -0.29 -0.6
byear.Father 52616 1776 46.25 1779 1778 53.37 1544 1889 345 -0.27 -0.46
age_at_1st_child 15943 27.8 5.2 27.04 27.32 4.51 14.62 67.64 53.03 1.2 3.01
age_at_last_child 15943 37.28 7.48 37.85 37.28 7.32 14.62 74.48 59.86 0.1 0.23
describeBin(krmh[, list(survive1y, surviveR, ever_married)])
  n mean sd
survive1y 57490 0.87 0.11
surviveR 57490 0.73 0.2
ever_married 80808 0.32 0.22
pander(xtabs(~ paternal_loss, krmh), caption = "Paternal loss at age")
Paternal loss at age
later [0,1] (1,5] (5,10] (10,15] (15,20] (20,25] (25,30] (30,35] (35,40] (40,45] unclear
5230 1448 3332 4638 4937 5164 5238 5302 5006 4353 2854 33306
pander(xtabs(~ maternal_loss, krmh), caption = "Maternal loss at age")
Maternal loss at age
later [0,1] (1,5] (5,10] (10,15] (15,20] (20,25] (25,30] (30,35] (35,40] (40,45] unclear
8567 1822 3297 4023 3853 3675 3919 4374 4768 5079 3739 33692

included sample (N = 14034)

descriptives = psych::describe(krmh.1[, list(
    paternalage, maternalage, nr.siblings, dependent_sibs_f5y, age, spouses, children, grandchildren, byear, byear.Father, age_at_1st_child, age_at_last_child  )])
## Error in `[.data.frame`(krmh.1, , list(paternalage, maternalage, nr.siblings, : object 'paternalage' not found
round(data.frame(descriptives)[,2:12],2)
  n mean sd median trimmed mad min max range skew kurtosis
paternalage 35907 34.97 7.35 34.09 34.5 7.39 16.07 74.48 58.4 0.65 0.45
maternalage 36371 31.49 5.97 31.15 31.36 6.63 14.62 54.62 40.01 0.18 -0.65
nr.siblings 80808 3.94 2.91 4 3.76 2.97 0 17 17 0.46 -0.17
dependent_sibs_f5y 80808 1.74 1.39 2 1.62 1.48 0 14 14 0.82 1.76
age 24973 20.03 26.2 5.29 15.38 7.79 0 100.8 100.8 1.21 0.07
spouses 80808 0.36 0.57 0 0.27 0 0 5 5 1.48 2.08
children 80808 1.12 2.36 0 0.5 0 0 20 20 2.27 4.74
grandchildren 80808 1.26 4.21 0 0.09 0 0 62 62 4.38 23.1
byear 75018 1796 55.27 1800 1798 63.75 1572 1932 360 -0.29 -0.6
byear.Father 52616 1776 46.25 1779 1778 53.37 1544 1889 345 -0.27 -0.46
age_at_1st_child 15943 27.8 5.2 27.04 27.32 4.51 14.62 67.64 53.03 1.2 3.01
age_at_last_child 15943 37.28 7.48 37.85 37.28 7.32 14.62 74.48 59.86 0.1 0.23
describeBin(krmh.1[, list(survive1y, surviveR, ever_married)])
## Error in `[.data.frame`(krmh.1, , list(survive1y, surviveR, ever_married)): object 'survive1y' not found
pander(xtabs(~ paternal_loss, krmh.1), caption = "Paternal loss at age")
Paternal loss at age
later [0,1] (1,5] (5,10] (10,15] (15,20] (20,25] (25,30] (30,35] (35,40] (40,45] unclear
1967 306 725 1074 1195 1350 1495 1623 1658 1578 1063 0
pander(xtabs(~ maternal_loss, krmh.1), caption = "Maternal loss at age")
Maternal loss at age
later [0,1] (1,5] (5,10] (10,15] (15,20] (20,25] (25,30] (30,35] (35,40] (40,45] unclear
3037 429 846 1044 966 900 1012 1308 1482 1689 1321 0

Number of families with varying numbers of siblings available for comparison

crosstabs(krmh[!duplicated(idParents), ]$nr.siblings)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
13433 3467 2551 2292 1948 1623 1170 824 480 257 98 50 27 19 5 3 2 1
qplot(krmh[!duplicated(idParents), ]$nr.siblings, binwidth = 1) + xlab("Number of siblings per family") + desc_theme

Missingness patterns

The first table shows the number of missings per variable, the second table, using the indexes from the first, shows missings in which variables tend to occur together. Most variables of interest in this study are derived from these dates and so these patterns can show many cases did not have the data to calculate e.g. paternal loss (those lacking either the father’s death date, the anchor’s birth date or both).

pander_escape(missingness_patterns(krmh[, list(
    bdate, ddate, bdate.Father, ddate.Father, bdate.Mother, ddate.Mother
    )]))
##  index          col missings
##      1        ddate    52229
##      2 ddate.Mother    32025
##      3 ddate.Father    31617
##      4 bdate.Father    28192
##      5 bdate.Mother    27849
##      6        bdate     5790
Pattern Freq Culprit
1__________ 19733 ddate
___________ 12819 _
1_2_3_4_5__ 8735
1_2_3______ 5237
__2_3_4_5__ 2732
1_____4_5__ 2350
1_2________ 2335
1___3______ 2087
__2_3______ 2060
__2_3_4_5_6 2013
1_2_3_4_5_6 1549
1_____4____ 1526
__2________ 1394 ddate.Mother
____3______ 1310 ddate.Father
1_______5__ 1292
1_2___4_5__ 1211
1___3_4____ 1134
1_2_____5__ 1079
______4_5__ 1070
1___3_4_5__ 1008
________5__ 864 bdate.Mother
______4____ 817 bdate.Father
1_2_3___5__ 797
1_2_3_4____ 757
____3_4____ 580
__2_____5__ 452
__2___4_5__ 372
______4_5_6 358
____3_4_5__ 327
__2_3_4____ 277
1_____4_5_6 274
__2_3___5__ 254
1_2___4_5_6 222
__________6 158 bdate
1___3_4_5_6 156
____3_4_5_6 154
1_________6 148
1___3___5__ 145
1_2___4____ 139
__2___4_5_6 118
______4___6 96
________5_6 83
____3___5__ 63
__2___4____ 62
1_2_____5_6 54
1___3_4___6 53
1_______5_6 53
1_____4___6 35
__2_____5_6 35
1_2_3_____6 34
____3_4___6 33
1___3_____6 31
____3_____6 19
__2_______6 18
1_2_______6 17
1_2_3_4___6 16
__2_3_____6 16
1_2_3___5_6 14
__2_3_4___6 11
__2_3___5_6 8
1___3___5_6 5
__2___4___6 4
1_2___4___6 3
____3___5_6 2

Reproductive timing

ggplot(data = demo_trends[Year > 1680 & Year< 1900,]) + 
    geom_line(aes(x= Year, y = first, linetype = "first", colour = Parent),  size = 1) + 
  geom_line(aes(x = Year, y = all, linetype = "all", colour = Parent), size = 1) +
    geom_line(aes(x= Year, y = last, linetype = "last", colour = Parent),size = 1) + 
    scale_colour_manual(values = c(Father = "#6c92b2", Mother = "#aec05d")) + 
    scale_linetype_manual("Birth", breaks = c("last", "all","first"), values = c( "solid","dashed", "dotted")) +
    scale_y_continuous("Parental age at birth") + 
        geom_text(aes(x = Year, y = all + 0.5, 
                                    label = ifelse(Year %% 15 == 0, round(all), NA))) + 
    facet_wrap(~ Parent) +
    desc_theme + theme(legend.position = c(1,1),
  legend.justification = c(1,1), 
  legend.box = "horizontal",
  panel.margin = unit(2, "lines"))

Correlations between variables

round(cor(krmh[, list(
    paternalage, maternalage, birthorder, nr.siblings, children, grandchildren, byear, byear.Father, age_at_1st_child, age_at_last_child
    )], use = "pairwise.complete.obs"),2)
  paternalage maternalage birthorder nr.siblings children grandchildren byear byear.Father age_at_1st_child age_at_last_child
paternalage 1 0.63 0.56 0.14 -0.04 -0.02 0.05 -0.12 0.04 0
maternalage 0.63 1 0.64 0.1 -0.04 -0.03 0.11 0 0.03 0
birthorder 0.56 0.64 1 0.67 -0.03 -0.02 0.1 -0.04 0.02 0.02
nr.siblings 0.14 0.1 0.67 1 0.01 0 0.04 -0.07 0.02 0.05
children -0.04 -0.04 -0.03 0.01 1 0.64 -0.09 -0.15 -0.25 0.64
grandchildren -0.02 -0.03 -0.02 0 0.64 1 -0.14 -0.21 -0.14 0.32
byear 0.05 0.11 0.1 0.04 -0.09 -0.14 1 0.99 -0.04 -0.19
byear.Father -0.12 0 -0.04 -0.07 -0.15 -0.21 0.99 1 -0.08 -0.21
age_at_1st_child 0.04 0.03 0.02 0.02 -0.25 -0.14 -0.04 -0.08 1 0.44
age_at_last_child 0 0 0.02 0.05 0.64 0.32 -0.19 -0.21 0.44 1
ggplot(data=krmh, aes(x = byear, y = paternalage)) +
    geom_linerange(stat = "summary", fun.data = "median_hilow", colour = "#aec05d") +
    geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
    desc_theme

ggplot(data=krmh, aes(x = byear, y = age_at_1st_child)) +
    geom_linerange(stat = "summary", fun.data = "median_hilow", colour = "#aec05d") +
    geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
    desc_theme

ggplot(data=krmh, aes(x = byear, y = age_at_last_child)) +
    geom_linerange(stat = "summary", fun.data = "median_hilow", colour = "#aec05d") +
    geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
    desc_theme

ggplot(data=krmh, aes(x = byear, y = children)) + 
    geom_linerange(stat = "summary", fun.data = "median_hilow", colour = "#aec05d", na.rm=T) + 
    geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") + 

    desc_theme

ggplot(data=krmh, aes(x = byear, y = survive1y)) + 
    geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") + 

    desc_theme

ggplot(data=krmh, aes(x = byear, y = surviveR)) + 
    geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") + 

    desc_theme

ggplot(data=krmh, aes(x = children, y = grandchildren)) + 
    geom_jitter(colour = "#aec05d", alpha = I(0.1)) + 
    geom_pointrange(stat = "summary", fun.data = "median_hilow", colour = "#6c92b2") +
    geom_smooth(method = "glm", formula = y ~ poly(x,3), colour = "#6e85b0") + 
    desc_theme

ggplot(data=krmh, aes(x = children, y = children.surviving5y)) + 
    geom_jitter(colour = "#aec05d", alpha = I(0.1)) + 
    geom_pointrange(stat = "summary", fun.data = "median_hilow", colour = "#6c92b2") +
    geom_smooth(method = "glm", formula = y ~ poly(x,3), colour = "#6e85b0") + 
    desc_theme

ggplot(data=krmh, aes(x = round(age), y = children)) + 
    geom_jitter(colour = "#aec05d", alpha = I(0.1)) + 
    geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
    geom_smooth(colour = "#6e85b0") + 
    xlab("Age") +
    ylab("Number of children") +
    desc_theme

ggplot(data=krmh[children>0,], aes(x = round(age), y = children)) + 
    geom_jitter(colour = "#aec05d", alpha = I(0.1)) + 
    geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
    geom_smooth(colour = "#6e85b0") + 
    xlab("Age") +
    ylab("Number of children") +
    desc_theme

plot_zero_infl(krmh[ spouses > 0, ]$children)

ggplot(data=krmh, aes(x = paternalage.factor, y = survive1y)) + 
    geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") + 
    desc_theme

ggplot(data=krmh[spouses > 0, ], aes(x = paternalage.factor, y = children)) + 
    geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") + 
    desc_theme

Opportunities for selection

krmh.1$birth.decade = round(krmh.1$byear/5)*5


episodes = krmh.1 %>%
    filter(!is.na(male) | !is.na(survive1y) | !is.na(ever_married)) %>% 
    group_by(birth.decade) %>%
    summarise(
        "Population size" = as.numeric(length(idIndividu)),
        "0. Number of children" = ifelse(between(birth.decade, 1655,1840), cva(children), NA_real_ ), 
        "1. Surviving first year" = ifelse(between(birth.decade, 1655, 1870),cva_bin(survive1y), NA_real_ ),
        "2. Surviving to 15" = ifelse(between(birth.decade, 1655,1840), cva_bin(surviveR[survive1y==T]), NA_real_ ),
        "3. Ever married" = ifelse(between(birth.decade, 1655,1830), cva_bin(ever_married[surviveR==1]), NA_real_ ), 
        "4. Number of children" = ifelse(between(birth.decade, 1655,1840), cva(children[ever_married==1]), NA_real_ ), 
        "5. Grandchildren" = ifelse(between(birth.decade, 1655,1800), cva(grandchildren[children>0]), NA_real_ )
        ) %>%
    data.table()
## Error: expecting a single value
data.frame(episodes[order(birth.decade), ])
## Error in data.frame(episodes[order(birth.decade), ]): object 'episodes' not found
save(episodes, file = "coefs/krmh_episodes.rdata")
## Error in save(episodes, file = "coefs/krmh_episodes.rdata"): object 'episodes' not found
# krmh.1 = merge(krmh.1, episodes, by = "birth.decade", all.x = T)
(episodes.plot = ggplot(melt(episodes,id.vars=c('birth.decade'), na.rm = T)) + geom_line(aes(x=birth.decade, y=value)) + facet_wrap(~ variable,scales='free_y',ncol = 1)) + mymin
## Error in is.data.table(data): object 'episodes' not found

Sex ratio

(sex.ratio = krmh.1 %>% 
    filter(!is.na(male), byear > 1720) %>% 
    mutate(male = as.numeric(as.character(male))) %>%
    group_by(birth.decade) %>% 
    summarise(sex.ratio = sum(male)/length(male)) %>%
    data.frame()
)
birth.decade sex.ratio
1720 0.5
1725 0.5
1730 0.56
1735 0.5028
1740 0.5594
1745 0.5074
1750 0.5282
1755 0.5196
1760 0.5343
1765 0.5143
1770 0.5528
1775 0.5105
1780 0.5417
1785 0.529
1790 0.5267
1795 0.5012
1800 0.519
1805 0.525
1810 0.5281
1815 0.4946
1820 0.4784
1825 0.5014
1830 0.4714
1835 0.5112
ggplot(na.omit(sex.ratio)) + geom_line(aes(x=birth.decade, y=sex.ratio)) + mymin