Fertility and Kin - Descriptives

knitr::opts_chunk$set(warning =  F, message = F)

load and prepare data

source("0_helpers.R")
library(effsize)
library(tidyverse)

all_surveys <- readRDS("persons.rds")
diary <- readRDS("diary_persons.rds")

## only participants fullfilling inclusion criteria and not living with parents 
all_surveys_included = all_surveys %>% filter(reasons_for_exclusion == "" & abode_flat_share  != 2)
hc = all_surveys_included %>% filter(hormonal_contraception == T)
nc = all_surveys_included %>% filter(hormonal_contraception == F)

descriptives

distribution

## nc vs hc
xtabs(~ hormonal_contraception, all_surveys_included)
## hormonal_contraception
## FALSE  TRUE 
##   539   255
## single vs relationship
xtabs(~ relationship_status == 1, all_surveys_included)
## relationship_status == 1
## FALSE  TRUE 
##   531   263
## relationship status + contraception
xtabs(~ (relationship_status == 1) + hormonal_contraception, all_surveys_included)
##                         hormonal_contraception
## relationship_status == 1 FALSE TRUE
##                    FALSE   336  195
##                    TRUE    203   60
## percentage hc vs nc
round(prop.table(xtabs(~ hormonal_contraception, all_surveys_included))*100 ,2)
## hormonal_contraception
## FALSE  TRUE 
## 67.88 32.12

NC

all_surveys_included %>% filter(hormonal_contraception == F) %>% select(age, religiosity, first_time, menstruation_length, number_sexual_partner, menarche, bfi_agree, bfi_open, bfi_neuro, bfi_extra, bfi_consc) %>%
  gather(variable, value) %>%
  group_by(variable) %>%
  filter(!is.na(value)) %>%
  summarise(mean = mean(value), sd = sd(value), 
            median = median(value),
            min = min(value), max  = max(value)) %>%
  data.frame()
##                 variable   mean      sd median    min max
## 1                    age 26.525  5.8479 26.000 18.000  49
## 2              bfi_agree  3.667  0.5903  3.667  1.444   5
## 3              bfi_consc  3.484  0.6544  3.556  1.667   5
## 4              bfi_extra  3.416  0.7615  3.500  1.375   5
## 5              bfi_neuro  2.980  0.7648  3.000  1.000   5
## 6               bfi_open  3.823  0.6183  3.900  1.500   5
## 7             first_time 17.062  2.8260 17.000  0.000  29
## 8               menarche 12.761  1.3797 13.000  8.000  18
## 9    menstruation_length 28.987  3.1502 28.000 19.000  50
## 10 number_sexual_partner  8.948 10.9999  6.000  0.000 100
## 11           religiosity  2.208  1.3279  2.000  1.000   6

HC

all_surveys_included %>% filter(hormonal_contraception == T) %>% select(age, religiosity, first_time, menstruation_length, number_sexual_partner, menarche, bfi_agree, bfi_open, bfi_neuro, bfi_extra, bfi_consc) %>%
  gather(variable, value) %>%
  group_by(variable) %>%
  filter(!is.na(value)) %>%
  dplyr::summarise(mean = mean(value), sd = sd(value), 
            median = median(value),
            min = min(value), max  = max(value)) %>%
  data.frame()
##                 variable   mean     sd median    min     max
## 1                    age 23.973 4.5778 23.000 18.000  46.000
## 2              bfi_agree  3.737 0.6158  3.778  1.889   4.889
## 3              bfi_consc  3.630 0.6900  3.667  1.667   5.000
## 4              bfi_extra  3.487 0.8091  3.500  1.125   5.000
## 5              bfi_neuro  2.931 0.7676  2.875  1.000   4.625
## 6               bfi_open  3.733 0.6073  3.700  1.700   4.900
## 7             first_time 16.755 2.6479 17.000  0.000  32.000
## 8               menarche 12.749 1.2642 13.000  9.000  18.000
## 9    menstruation_length 27.643 2.3110 28.000 15.000  36.000
## 10 number_sexual_partner  6.204 9.1362  3.000  0.000 110.000
## 11           religiosity  2.157 1.3395  2.000  1.000   6.000

single women

NC

all_surveys_included %>% filter(hormonal_contraception == F & relationship_status == 1) %>% select(age, religiosity, first_time, menstruation_length, number_sexual_partner, menarche, bfi_agree, bfi_open, bfi_neuro, bfi_extra, bfi_consc) %>%
  gather(variable, value) %>%
  group_by(variable) %>%
  filter(!is.na(value)) %>%
  dplyr::summarise(mean = mean(value), sd = sd(value), 
            median = median(value),
            min = min(value), max  = max(value)) %>%
  data.frame()
##                 variable   mean      sd median    min     max
## 1                    age 24.399  4.8730 23.000 18.000  43.000
## 2              bfi_agree  3.696  0.5570  3.778  2.111   4.889
## 3              bfi_consc  3.381  0.6295  3.333  1.778   4.778
## 4              bfi_extra  3.461  0.7201  3.500  1.375   4.875
## 5              bfi_neuro  2.950  0.7800  3.000  1.125   5.000
## 6               bfi_open  3.857  0.6034  3.900  2.300   5.000
## 7             first_time 17.533  2.6300 17.000 13.000  27.000
## 8               menarche 12.645  1.3688 13.000  8.000  16.000
## 9    menstruation_length 28.862  3.1135 28.000 20.000  40.000
## 10 number_sexual_partner  8.803 13.7380  5.000  0.000 100.000
## 11           religiosity  2.123  1.2741  2.000  1.000   6.000

HC

all_surveys_included %>% filter(hormonal_contraception == T & relationship_status == 1) %>% select(age, religiosity, first_time, menstruation_length, number_sexual_partner, menarche, bfi_agree, bfi_open, bfi_neuro, bfi_extra, bfi_consc) %>%
  gather(variable, value) %>%
  group_by(variable) %>%
  filter(!is.na(value)) %>%
  dplyr::summarise(mean = mean(value), sd = sd(value), 
            median = median(value),
            min = min(value), max  = max(value)) %>%
  data.frame()
##                 variable   mean     sd median    min    max
## 1                    age 24.533 5.2866 23.500 18.000 42.000
## 2              bfi_agree  3.648 0.6010  3.667  2.111  4.889
## 3              bfi_consc  3.580 0.6847  3.611  1.667  4.778
## 4              bfi_extra  3.542 0.8207  3.625  1.500  4.875
## 5              bfi_neuro  2.894 0.7821  2.875  1.250  4.500
## 6               bfi_open  3.802 0.6816  3.800  1.700  4.900
## 7             first_time 16.793 2.2302 17.000 13.000 26.000
## 8               menarche 12.850 1.2599 13.000 10.000 16.000
## 9    menstruation_length 27.317 3.2703 28.000 15.000 35.000
## 10 number_sexual_partner  8.017 8.4462  5.000  0.000 40.000
## 11           religiosity  1.817 1.0813  1.000  1.000  5.000

compare groups

age

t.test(hc$age, nc$age)

    Welch Two Sample t-test

data:  hc$age and nc$age
t = -6.7, df = 622, p-value = 5e-11
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -3.302 -1.803
sample estimates:
mean of x mean of y 
    23.97     26.53 
cohen.d(hc$age, nc$age, hedges.correction = T, pooled = F)
## 
## Hedges's g
## 
## g estimate: -0.4361 (small)
## 95 percent confidence interval:
##   lower   upper 
## -0.5867 -0.2855

religiosity

t.test(hc$religiosity, nc$religiosity)

    Welch Two Sample t-test

data:  hc$religiosity and nc$religiosity
t = -0.5, df = 495, p-value = 0.6
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.2504  0.1485
sample estimates:
mean of x mean of y 
    2.157     2.208 
cohen.d(hc$religiosity, nc$religiosity, hedges.correction = T, pooled = F)
## 
## Hedges's g
## 
## g estimate: -0.03832 (negligible)
## 95 percent confidence interval:
##   lower   upper 
## -0.1874  0.1108

first time

t.test(hc$first_time, nc$first_time)

    Welch Two Sample t-test

data:  hc$first_time and nc$first_time
t = -1.5, df = 537, p-value = 0.1
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.7179  0.1035
sample estimates:
mean of x mean of y 
    16.75     17.06 
cohen.d(hc$first_time, nc$first_time, hedges.correction = T, pooled = F, na.rm =T)
## 
## Hedges's g
## 
## g estimate: -0.1086 (negligible)
## 95 percent confidence interval:
##    lower    upper 
## -0.26005  0.04287

menarche

t.test(hc$menarche, nc$menarche)

    Welch Two Sample t-test

data:  hc$menarche and nc$menarche
t = -0.12, df = 540, p-value = 0.9
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.2061  0.1828
sample estimates:
mean of x mean of y 
    12.75     12.76 
cohen.d(hc$menarche, nc$menarche, hedges.correction = T, pooled = F)
## 
## Hedges's g
## 
## g estimate: -0.008435 (negligible)
## 95 percent confidence interval:
##   lower   upper 
## -0.1575  0.1406

cycle length

t.test(hc$menstruation_length, nc$menstruation_length)

    Welch Two Sample t-test

data:  hc$menstruation_length and nc$menstruation_length
t = -6.8, df = 657, p-value = 3e-11
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.7334 -0.9543
sample estimates:
mean of x mean of y 
    27.64     28.99 
cohen.d(hc$menstruation_length, nc$menstruation_length, hedges.correction = T, pooled = F)
## 
## Hedges's g
## 
## g estimate: -0.4262 (small)
## 95 percent confidence interval:
##   lower   upper 
## -0.5767 -0.2757

nr sex partner

t.test(hc$number_sexual_partner, nc$number_sexual_partner)

    Welch Two Sample t-test

data:  hc$number_sexual_partner and nc$number_sexual_partner
t = -3.7, df = 591, p-value = 0.0002
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -4.203 -1.285
sample estimates:
mean of x mean of y 
    6.204     8.948 
cohen.d(hc$number_sexual_partner, nc$number_sexual_partner, hedges.correction = T, pooled = F)
## 
## Hedges's g
## 
## g estimate: -0.2492 (small)
## 95 percent confidence interval:
##    lower    upper 
## -0.39879 -0.09967

extraversion

t.test(hc$bfi_extra, nc$bfi_extra)

    Welch Two Sample t-test

data:  hc$bfi_extra and nc$bfi_extra
t = 1.2, df = 472, p-value = 0.2
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.04766  0.18955
sample estimates:
mean of x mean of y 
    3.487     3.416 
cohen.d(hc$bfi_extra, nc$bfi_extra, hedges.correction = T, pooled = F)
## 
## Hedges's g
## 
## g estimate: 0.09308 (negligible)
## 95 percent confidence interval:
##    lower    upper 
## -0.05605  0.24220

agreeableness

t.test(hc$bfi_agree, nc$bfi_agree)

    Welch Two Sample t-test

data:  hc$bfi_agree and nc$bfi_agree
t = 1.5, df = 480, p-value = 0.1
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.02018  0.16135
sample estimates:
mean of x mean of y 
    3.737     3.667 
cohen.d(hc$bfi_agree, nc$bfi_agree, hedges.correction = T, pooled = F)
## 
## Hedges's g
## 
## g estimate: 0.1195 (negligible)
## 95 percent confidence interval:
##   lower   upper 
## -0.0297  0.2686

neuroticism

t.test(hc$bfi_neuro, nc$bfi_neuro)

    Welch Two Sample t-test

data:  hc$bfi_neuro and nc$bfi_neuro
t = -0.84, df = 497, p-value = 0.4
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.16318  0.06582
sample estimates:
mean of x mean of y 
    2.931     2.980 
cohen.d(hc$bfi_neuro, nc$bfi_neuro, hedges.correction = T, pooled = F)
## 
## Hedges's g
## 
## g estimate: -0.06359 (negligible)
## 95 percent confidence interval:
##   lower   upper 
## -0.2127  0.0855

conscientiousness

t.test(hc$bfi_consc, nc$bfi_consc)

    Welch Two Sample t-test

data:  hc$bfi_consc and nc$bfi_consc
t = 2.8, df = 476, p-value = 0.005
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 0.04403 0.24677
sample estimates:
mean of x mean of y 
    3.630     3.484 
cohen.d(hc$bfi_consc, nc$bfi_consc, hedges.correction = T, pooled = F)
## 
## Hedges's g
## 
## g estimate: 0.222 (small)
## 95 percent confidence interval:
##   lower   upper 
## 0.07251 0.37142

openness

t.test(hc$bfi_open, nc$bfi_open)

    Welch Two Sample t-test

data:  hc$bfi_open and nc$bfi_open
t = -1.9, df = 507, p-value = 0.05
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.181260  0.001173
sample estimates:
mean of x mean of y 
    3.733     3.823 
cohen.d(hc$bfi_open, nc$bfi_open, hedges.correction = T, pooled = F)
## 
## Hedges's g
## 
## g estimate: -0.1455 (negligible)
## 95 percent confidence interval:
##     lower     upper 
## -0.294730  0.003724

Time spent with whom

diary %>% filter(reasons_for_exclusion == "") %>% 
  select(time_family, person_is_related_man, person_is_related_woman) %>% cor(use = 'p') %>% round(2)
##                         time_family person_is_related_man person_is_related_woman
## time_family                    1.00                  0.21                    0.24
## person_is_related_man          0.21                  1.00                    0.40
## person_is_related_woman        0.24                  0.40                    1.00
diary %>% filter(reasons_for_exclusion == "") %>% 
  xtabs(~ time_family + I(person_is_related_man>0) + I(person_is_related_woman>0), .)
## , , I(person_is_related_woman > 0) = FALSE
## 
##            I(person_is_related_man > 0)
## time_family FALSE TRUE
##           0  1643   25
##           1  1959   24
##           2  2158   39
##           3  1049   44
##           4   478   50
## 
## , , I(person_is_related_woman > 0) = TRUE
## 
##            I(person_is_related_man > 0)
## time_family FALSE TRUE
##           0    51   19
##           1   101   30
##           2   206   58
##           3   182   85
##           4   172  139

Exclusion reasons

Between subjects

exclusion_reasons <- all_surveys %>% 
  mutate(reasons_for_exclusion = str_sub(reasons_for_exclusion, 1, -3)) %>% 
  select(session, reasons_for_exclusion) %>% 
  drop_na(session) %>% 
  comma_separated_to_columns(reasons_for_exclusion)

exclusion_reasons_table <- exclusion_reasons %>% 
  select(-session) %>% 
  summarise_all(sum) %>% 
  sort() %>% 
  gather(reason, n) %>% 
  left_join(diary %>% mutate(reason = str_sub(reasons_for_exclusion, 1, -3)) %>% group_by(reason) %>% summarise(unique = n())) %>% 
  mutate(unique = if_else(is.na(unique), 0L, unique)) %>% 
  left_join(exclusion_reasons %>% 
  gather(reason, n, -session) %>% 
  filter(n > 0) %>% 
  distinct(session, reason, n) %>% 
  group_by(reason) %>%
  summarise(n_women = sum(n)))

library(UpSetR)
exclusion_reasons %>% 
  filter(included == 0) %>% 
  select(-included) %>% 
  as.data.frame() %>% 
  {
  upset(., ncol(.), 20, show.numbers = TRUE, order.by = "freq",
      main.bar.color = "#6E8691",
      matrix.color = "#6E8691",
      sets.bar.color = "#53AC9B")
  }

exclusion_reasons_table %>% 
  knitr::kable(caption = "Reasons for exclusion. _n_ shows the number of affected women, _unique_ those who for whom this was the only reason to be excluded.")
Reasons for exclusion. n shows the number of affected women, unique those who for whom this was the only reason to be excluded.
reason n unique n_women
sex_hormones 7 123 7
non_heterosexual_relationship 9 0 9
switched_contraception 18 135 18
pregnant 23 0 23
not_heterosexual_female 26 175 26
breast_feeding 28 0 28
older_than_50 35 0 35
menopausal 41 0 41
diary_days_not_usable 45 0 45
didnt_finish_demographics 53 0 53
didnt_do_diary 66 0 66
fertility_never_estimable 143 0 143
living_with_parents 145 1476 145
didnt_finish_personality 196 0 196
no_regular_menstruation 302 2349 302
included 794 0 794

Diary

exclusion_reasons_diary <- diary %>% 
  mutate(reasons_for_exclusion = str_sub(reasons_for_exclusion, 1, -3)) %>% 
  select(session, created_date, reasons_for_exclusion) %>% 
  drop_na(session, created_date) %>% 
  comma_separated_to_columns(reasons_for_exclusion) %>% 
  select( -created_date)

exclusion_reasons_diary_table <- exclusion_reasons_diary %>% 
  select(-session) %>% 
  summarise_all(sum) %>% 
  sort() %>% 
  gather(reason, n) %>% 
  left_join(diary %>% mutate(reason = str_sub(reasons_for_exclusion, 1, -3)) %>% group_by(reason) %>% summarise(unique = n())) %>% 
  mutate(unique = if_else(is.na(unique), 0L, unique)) %>% 
  left_join(exclusion_reasons_diary %>% 
  gather(reason, n, -session) %>% 
  filter(n > 0) %>% 
  distinct(session, reason, n) %>% 
  group_by(reason) %>%
  summarise(n_women = sum(n)))

library(UpSetR)
exclusion_reasons_diary %>% 
  filter(included == 0) %>% 
  select(-included) %>% 
  as.data.frame() %>% 
  {
  upset(., ncol(.), 20, show.numbers = TRUE, order.by = "freq",
      main.bar.color = "#6E8691",
      matrix.color = "#6E8691",
      sets.bar.color = "#53AC9B")
  }

exclusion_reasons_diary_table %>% 
  knitr::kable(caption = "Reasons for exclusion. _n_ shows the number of affected women, _unique_ those who for whom this was the only reason to be excluded.")
Reasons for exclusion. n shows the number of affected women, unique those who for whom this was the only reason to be excluded.
reason n unique n_women
dishonest_answer 150 60 86
didnt_do_diary 187 0 28
non_heterosexual_relationship 338 0 8
sex_hormones 442 123 7
did_not_finish_entry 856 123 542
not_heterosexual_female 896 175 16
pregnant 1047 0 19
breast_feeding 1137 0 22
switched_contraception 1178 135 18
older_than_50 1560 0 28
menopausal 2166 0 39
diary_days_not_usable 2361 0 45
fertility_never_estimable 2527 0 143
next_menstrual_onset_unobserved 4100 575 207
cycle_shorter_than_20 4736 830 476
living_with_parents 6587 1476 123
cycle_longer_than_40 7461 536 222
included 12251 0 263
no_regular_menstruation 13237 2349 245
skipped_diary_entry 15321 1960 1245
not_single 51899 22659 922