Loading details
# bsub -q fat -W 48:00 -n 1 Rscript -e "setwd('/usr/users/rarslan/updated_data/'); filebase = '1_swed_descriptives'; knitr::knit(input = paste0(filebase,'.Rmd'), output = paste0(filebase,'.md'));cat(readLines(paste0(filebase,'.md')), sep = '\n')"
source("0__helpers.R")
opts_chunk$set(render = pander_handler, cache=F,cache.lazy=F,tidy=FALSE,autodep=TRUE,dev='png',fig.width=12,fig.height=7.5)
# load("swed.rdata")
load("swed.rdata")
load("swed1.rdata")
load("swed2.rdata")
demo_trends = aggDemoTrends(swed)
desc_theme = theme_minimal(base_size = 24)
update_geom_defaults("bar", list(fill = "#6c92b2", alpha = 1/2))
mymin = theme_minimal() +theme(panel.grid.major.y =element_blank(),panel.grid.major.x = element_line(colour="#eeeeee"))
swed.1[, paternalage := 10 * paternalage]; swed[, paternalage := 10 * paternalage]; swed.2[, paternalage := 10 * paternalage];
swed.1[, maternalage := 10 * maternalage]; swed[, maternalage := 10 * maternalage]; swed.2[, maternalage := 10 * maternalage];
swed.1[, age_at_1st_child := 10 * age_at_1st_child]; swed[, age_at_1st_child := 10 * age_at_1st_child]; swed.2[, age_at_1st_child := 10 * age_at_1st_child]
swed.1[, age_at_last_child := 10 * age_at_last_child]; swed[, age_at_last_child := 10 * age_at_last_child]; swed.2[, age_at_last_child := 10 * age_at_last_child]
Missingness patterns
The first table shows the number of missings per variable, the second table, using the indexes from the first, shows missings in which variables tend to occur together. Most variables of interest in this study are derived from these dates and so these patterns can show many cases did not have the data to calculate e.g. paternal loss (those lacking either the father’s death date, the anchor’s birth date or both).
pander_escape(missingness_patterns(swed[, list(
byear, dyear, byear.Father, dyear.Father, byear.Mother, dyear.Mother, education
)]))
## index col missings
## 1 dyear 7849926
## 2 dyear.Mother 6084972
## 3 dyear.Father 5380461
## 4 education 2111673
## 5 byear.Mother 68294
## 6 byear.Father 11578
1_2_3______ |
2959092 |
|
1_2_3_4____ |
1935537 |
|
1__________ |
1504260 |
dyear |
1_2________ |
966229 |
|
1___3______ |
347415 |
|
___________ |
172918 |
_ |
__2_3_4____ |
40504 |
|
__2________ |
37058 |
dyear.Mother |
1_2___4____ |
32376 |
|
1_____4____ |
26614 |
|
1_2_____5__ |
26592 |
|
__2_3______ |
26072 |
|
____3______ |
21513 |
dyear.Father |
______4____ |
19844 |
education |
__2___4____ |
14825 |
|
1_2_3___5__ |
12523 |
|
1___3_4____ |
12427 |
|
1_2_3_4_5__ |
11054 |
|
__2___4_5__ |
5946 |
|
____3_4____ |
5926 |
|
__2_____5__ |
5228 |
|
1_2_______6 |
3018 |
|
1_2_3_4___6 |
1984 |
|
1_2_3_____6 |
1982 |
|
1_________6 |
1819 |
|
1___3___5__ |
1603 |
|
1_2___4___6 |
1446 |
|
1_2___4_5__ |
1389 |
|
1_______5__ |
1096 |
|
__2_3___5__ |
785 |
|
__2_3_4_5__ |
715 |
|
1___3_4_5__ |
599 |
|
1___3_____6 |
290 |
|
__________6 |
282 |
byear.Father |
1_2_3_4_5_6 |
175 |
|
________5__ |
150 |
byear.Mother |
1_2_3___5_6 |
123 |
|
1_2_____5_6 |
94 |
|
1_2___4_5_6 |
64 |
|
__2_______6 |
48 |
|
1_____4_5__ |
39 |
|
1_____4___6 |
38 |
|
__2_3_4___6 |
37 |
|
__2___4___6 |
36 |
|
______4___6 |
27 |
|
______4_5__ |
24 |
|
____3___5__ |
23 |
|
1_______5_6 |
22 |
|
____3_____6 |
21 |
|
__2_3_____6 |
19 |
|
1___3_4___6 |
14 |
|
____3_4_5__ |
14 |
|
__2_____5_6 |
12 |
|
1_____4_5_6 |
7 |
|
__2_3_4_5_6 |
4 |
|
1___3___5_6 |
3 |
|
__2___4_5_6 |
3 |
|
____3_4___6 |
3 |
|
________5_6 |
3 |
|
1___3_4_5_6 |
2 |
|
__2_3___5_6 |
2 |
|
Reproductive timing
ggplot(data = demo_trends) +
geom_line(aes(x= Year, y = first, linetype = "first", colour = Parent), size = 1) +
geom_line(aes(x = Year, y = all, linetype = "all", colour = Parent), size = 1) +
geom_line(aes(x= Year, y = last, linetype = "last", colour = Parent),size = 1) +
scale_colour_manual(values = c(Father = "#6c92b2", Mother = "#aec05d")) +
scale_linetype_manual("Birth", breaks = c("last", "all","first"), values = c( "solid","dashed", "dotted")) +
scale_y_continuous("Parental age at birth", limits = c(23,40)) +
geom_text(aes(x = Year, y = all + 0.31,
label = ifelse(Year %% 15 == 0, round(all), NA))) +
facet_wrap(~ Parent) +
desc_theme + theme(legend.position = c(1,1),
legend.justification = c(1,1),
legend.box = "horizontal",
panel.margin = unit(2, "lines"))
Correlations between variables
round(cor(swed.1[, list(
paternalage, maternalage, birthorder, nr.siblings, children, grandchildren, byear, byear.Father, age_at_1st_child, age_at_last_child
)], use = "pairwise.complete.obs"),2)
paternalage |
1 |
0.77 |
0.43 |
0.13 |
-0.03 |
-0.03 |
-0.06 |
-0.89 |
0.06 |
0.03 |
maternalage |
0.77 |
1 |
0.45 |
0.09 |
-0.04 |
-0.04 |
-0.06 |
-0.69 |
0.08 |
0.04 |
birthorder |
0.43 |
0.45 |
1 |
0.7 |
0.04 |
0.05 |
0 |
-0.37 |
-0.06 |
-0.03 |
nr.siblings |
0.13 |
0.09 |
0.7 |
1 |
0.08 |
0.09 |
-0.01 |
-0.11 |
-0.09 |
-0.03 |
children |
-0.03 |
-0.04 |
0.04 |
0.08 |
1 |
0.43 |
-0.01 |
0.02 |
-0.28 |
0.38 |
grandchildren |
-0.03 |
-0.04 |
0.05 |
0.09 |
0.43 |
1 |
-0.33 |
-0.12 |
-0.53 |
-0.25 |
byear |
-0.06 |
-0.06 |
0 |
-0.01 |
-0.01 |
-0.33 |
1 |
0.51 |
0.13 |
0.09 |
byear.Father |
-0.89 |
-0.69 |
-0.37 |
-0.11 |
0.02 |
-0.12 |
0.51 |
1 |
0.01 |
0.01 |
age_at_1st_child |
0.06 |
0.08 |
-0.06 |
-0.09 |
-0.28 |
-0.53 |
0.13 |
0.01 |
1 |
0.62 |
age_at_last_child |
0.03 |
0.04 |
-0.03 |
-0.03 |
0.38 |
-0.25 |
0.09 |
0.01 |
0.62 |
1 |
ggplot(data=swed, aes(x = byear, y = paternalage)) +
geom_linerange(stat = "summary", fun.data = "mean_sdl", colour = "#aec05d") +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
desc_theme
ggplot(data=swed, aes(x = byear, y = age_at_1st_child)) +
geom_linerange(stat = "summary", fun.data = "median_hilow", colour = "#aec05d") +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
desc_theme
ggplot(data=swed, aes(x = byear, y = age_at_last_child)) +
geom_linerange(stat = "summary", fun.data = "median_hilow", colour = "#aec05d") +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
desc_theme
ggplot(data=swed, aes(x = byear, y = children)) +
geom_linerange(stat = "summary", fun.data = "median_hilow", colour = "#aec05d") +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
desc_theme
ggplot(data=swed, aes(x = byear, y = survive1y)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") +
desc_theme
ggplot(data=swed, aes(x = byear, y = surviveR)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") +
desc_theme
ggplot(data=swed.1, aes(x = children, y = grandchildren)) +
geom_jitter(colour = "#aec05d", alpha = I(0.1)) +
geom_pointrange(stat = "summary", fun.data = "median_hilow", colour = "#6c92b2") +
geom_smooth(method = "glm", formula = y ~ poly(x,3), colour = "#6e85b0") +
desc_theme
crosstabs(~ children + children.surviving5y, data = swed.1)
0 |
286116 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
2232 |
196720 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2 |
553 |
3942 |
534577 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
3 |
33 |
439 |
8528 |
275295 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
4 |
4 |
39 |
580 |
5655 |
74696 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
5 |
0 |
0 |
33 |
374 |
2116 |
18419 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
6 |
0 |
0 |
1 |
29 |
157 |
677 |
5049 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
7 |
0 |
0 |
0 |
3 |
14 |
78 |
270 |
1508 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
8 |
0 |
0 |
0 |
0 |
4 |
5 |
31 |
108 |
522 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
9 |
0 |
0 |
0 |
1 |
0 |
0 |
3 |
11 |
39 |
192 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
10 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
8 |
23 |
99 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
11 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
5 |
5 |
42 |
0 |
0 |
0 |
0 |
0 |
0 |
12 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
1 |
3 |
18 |
0 |
0 |
0 |
0 |
0 |
13 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
1 |
4 |
5 |
0 |
0 |
0 |
0 |
14 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2 |
0 |
0 |
0 |
15 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
3 |
0 |
0 |
16 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2 |
0 |
17 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
2 |
ggplot(data=swed.1, aes(x = children, y = children.surviving5y)) +
geom_jitter(colour = "#aec05d", alpha = I(0.1)) +
geom_pointrange(stat = "summary", fun.data = "median_hilow", colour = "#6c92b2") +
geom_smooth(method = "glm", formula = y ~ poly(x,3), colour = "#6e85b0") +
desc_theme
ggplot(data=swed.1, aes(x = round(age), y = children)) +
geom_jitter(colour = "#aec05d", alpha = I(0.1)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
geom_smooth(colour = "#6e85b0") +
xlab("Age") +
ylab("Number of children") +
desc_theme
ggplot(data=swed.1[children>0,], aes(x = round(age), y = children)) +
geom_jitter(colour = "#aec05d", alpha = I(0.1)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#6c92b2") +
geom_smooth(colour = "#6e85b0") +
xlab("Age") +
ylab("Number of children") +
desc_theme
plot_zero_infl(swed.1[ spouses > 0, ]$children)
ggplot(data=swed.2, aes(x = paternalage.factor, y = survive1y)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") +
desc_theme
ggplot(data=swed.1[spouses > 0, ], aes(x = paternalage.factor, y = children)) +
geom_pointrange(stat = "summary", fun.data = "mean_cl_boot", colour = "#aec05d") +
desc_theme
Opportunities for selection
swed$any_children = ifelse(swed$children > 0, 1, 0)
episodes = swed %>%
filter(!is.na(male) | !is.na(survive1y) | !is.na(ever_married)) %>%
group_by(byear) %>%
summarise(
"Population size" = as.numeric(length(idIndividu)),
"0. Children " = ifelse(between(byear, 1947,1958), cva(children), NA_real_ ),
"0. Any children" = ifelse(between(byear, 1947,1958), cva_bin(any_children), NA_real_ ),
"1. Surviving first year" = ifelse(between(byear, 1962,1990),cva_bin(survive1y), NA_real_ ),
"2. Surviving to 15" = ifelse(between(byear, 1962,1977), cva_bin(surviveR[survive1y==T]), NA_real_ ),
"3. Ever married" = ifelse(between(byear, 1947,1960), cva_bin(ever_married[surviveR==1]), NA_real_ ),
"4. Children" = ifelse(between(byear, 1947,1958), cva(children[ever_married==1]), NA_real_ ),
"5. Grandchildren" = ifelse(between(byear, 1947,1947), cva(grandchildren[children>0]), NA_real_ )
) %>%
setDT()
data.frame(episodes[order(byear), ])
1932 |
27052 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1933 |
49286 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1934 |
59173 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1935 |
63903 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1936 |
68781 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1937 |
71236 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1938 |
75451 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1939 |
79599 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1940 |
79772 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1941 |
85605 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1942 |
1e+05 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1943 |
110883 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1944 |
119153 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1945 |
120772 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1946 |
121667 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1947 |
119906 |
0.6579 |
0.4644 |
NA |
NA |
0.6918 |
0.5278 |
0.9062 |
1948 |
118922 |
0.6627 |
0.4688 |
NA |
NA |
0.725 |
0.5268 |
NA |
1949 |
114845 |
0.6716 |
0.4771 |
NA |
NA |
0.7657 |
0.5304 |
NA |
1950 |
110148 |
0.6752 |
0.4838 |
NA |
NA |
0.8218 |
0.5286 |
NA |
1951 |
105577 |
0.6889 |
0.4947 |
NA |
NA |
0.895 |
0.5336 |
NA |
1952 |
106774 |
0.6898 |
0.5005 |
NA |
NA |
0.9525 |
0.5279 |
NA |
1953 |
107538 |
0.6951 |
0.505 |
NA |
NA |
1.001 |
0.5331 |
NA |
1954 |
103736 |
0.6987 |
0.5119 |
NA |
NA |
1.061 |
0.5286 |
NA |
1955 |
106383 |
0.7038 |
0.5179 |
NA |
NA |
1.141 |
0.5314 |
NA |
1956 |
107608 |
0.7085 |
0.5244 |
NA |
NA |
1.172 |
0.5326 |
NA |
1957 |
107128 |
0.7086 |
0.5274 |
NA |
NA |
1.239 |
0.5285 |
NA |
1958 |
105759 |
0.7099 |
0.5305 |
NA |
NA |
1.286 |
0.5296 |
NA |
1959 |
106062 |
NA |
NA |
NA |
NA |
1.334 |
NA |
NA |
1960 |
103898 |
NA |
NA |
NA |
NA |
1.44 |
NA |
NA |
1961 |
108905 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1962 |
111569 |
NA |
NA |
0.1116 |
0.06899 |
NA |
NA |
NA |
1963 |
117652 |
NA |
NA |
0.1091 |
0.06994 |
NA |
NA |
NA |
1964 |
127455 |
NA |
NA |
0.1066 |
0.06633 |
NA |
NA |
NA |
1965 |
127698 |
NA |
NA |
0.1023 |
0.06465 |
NA |
NA |
NA |
1966 |
127726 |
NA |
NA |
0.0999 |
0.06332 |
NA |
NA |
NA |
1967 |
126358 |
NA |
NA |
0.1007 |
0.06174 |
NA |
NA |
NA |
1968 |
118625 |
NA |
NA |
0.1019 |
0.06217 |
NA |
NA |
NA |
1969 |
112567 |
NA |
NA |
0.09371 |
0.05815 |
NA |
NA |
NA |
1970 |
114364 |
NA |
NA |
0.09316 |
0.05753 |
NA |
NA |
NA |
1971 |
118423 |
NA |
NA |
0.09247 |
0.05584 |
NA |
NA |
NA |
1972 |
117025 |
NA |
NA |
0.08945 |
0.05647 |
NA |
NA |
NA |
1973 |
114983 |
NA |
NA |
0.08506 |
0.05702 |
NA |
NA |
NA |
1974 |
115800 |
NA |
NA |
0.0863 |
0.05629 |
NA |
NA |
NA |
1975 |
109598 |
NA |
NA |
0.08018 |
0.0523 |
NA |
NA |
NA |
1976 |
104096 |
NA |
NA |
0.08024 |
0.05303 |
NA |
NA |
NA |
1977 |
102021 |
NA |
NA |
0.07756 |
0.0507 |
NA |
NA |
NA |
1978 |
99404 |
NA |
NA |
0.07648 |
NA |
NA |
NA |
NA |
1979 |
102788 |
NA |
NA |
0.07624 |
NA |
NA |
NA |
NA |
1980 |
104171 |
NA |
NA |
0.07411 |
NA |
NA |
NA |
NA |
1981 |
101478 |
NA |
NA |
0.07301 |
NA |
NA |
NA |
NA |
1982 |
101023 |
NA |
NA |
0.07214 |
NA |
NA |
NA |
NA |
1983 |
100255 |
NA |
NA |
0.07595 |
NA |
NA |
NA |
NA |
1984 |
102913 |
NA |
NA |
0.0745 |
NA |
NA |
NA |
NA |
1985 |
107518 |
NA |
NA |
0.07345 |
NA |
NA |
NA |
NA |
1986 |
110995 |
NA |
NA |
0.06894 |
NA |
NA |
NA |
NA |
1987 |
113599 |
NA |
NA |
0.07114 |
NA |
NA |
NA |
NA |
1988 |
121462 |
NA |
NA |
0.06836 |
NA |
NA |
NA |
NA |
1989 |
125042 |
NA |
NA |
0.06856 |
NA |
NA |
NA |
NA |
1990 |
132883 |
NA |
NA |
0.07006 |
NA |
NA |
NA |
NA |
1991 |
131987 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1992 |
129699 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1993 |
123604 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1994 |
118708 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1995 |
109684 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1996 |
101571 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1997 |
96803 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1998 |
95550 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
1999 |
94648 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
2000 |
96816 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
2001 |
96994 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
2002 |
101182 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
2003 |
103929 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
2004 |
105677 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
2005 |
105620 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
2006 |
109343 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
2007 |
109601 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
2008 |
109549 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
2009 |
105981 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
save(episodes, file = "coefs/swed_episodes.rdata")
(episodes.plot = ggplot(melt(episodes,id.vars=c('byear'), na.rm = T)) + geom_line(aes(x=byear, y=value)) + facet_wrap(~ variable,scales='free_y',ncol = 1)) + mymin
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?