Fertility and Kin - Descriptives

knitr::opts_chunk$set(warning =  F, message = F)

load and prepare data

source("0_helpers.R")
library(effsize)
library(tidyverse)

all_surveys <- readRDS("persons.rds")
diary <- readRDS("diary_persons.rds")

## only participants fullfilling inclusion criteria and not living with parents 
all_surveys_included = all_surveys %>% filter(reasons_for_exclusion == "" & abode_flat_share  != 2)
hc = all_surveys_included %>% filter(hormonal_contraception == T)
nc = all_surveys_included %>% filter(hormonal_contraception == F)

descriptives

distribution

## nc vs hc
xtabs(~ hormonal_contraception, all_surveys_included)

## hormonal_contraception
## FALSE  TRUE 
##   539   255

## single vs relationship
xtabs(~ relationship_status == 1, all_surveys_included)

## relationship_status == 1
## FALSE  TRUE 
##   531   263

## relationship status + contraception
xtabs(~ (relationship_status == 1) + hormonal_contraception, all_surveys_included)

##                         hormonal_contraception
## relationship_status == 1 FALSE TRUE
##                    FALSE   336  195
##                    TRUE    203   60

## percentage hc vs nc
round(prop.table(xtabs(~ hormonal_contraception, all_surveys_included))*100 ,2)

## hormonal_contraception
## FALSE  TRUE 
## 67.88 32.12

NC

all_surveys_included %>% filter(hormonal_contraception == F) %>% select(age, religiosity, first_time, menstruation_length, number_sexual_partner, menarche, bfi_agree, bfi_open, bfi_neuro, bfi_extra, bfi_consc) %>%
  gather(variable, value) %>%
  group_by(variable) %>%
  filter(!is.na(value)) %>%
  summarise(mean = mean(value), sd = sd(value), 
            median = median(value),
            min = min(value), max  = max(value)) %>%
  data.frame()

##                 variable   mean      sd median    min max
## 1                    age 26.525  5.8479 26.000 18.000  49
## 2              bfi_agree  3.667  0.5903  3.667  1.444   5
## 3              bfi_consc  3.484  0.6544  3.556  1.667   5
## 4              bfi_extra  3.416  0.7615  3.500  1.375   5
## 5              bfi_neuro  2.980  0.7648  3.000  1.000   5
## 6               bfi_open  3.823  0.6183  3.900  1.500   5
## 7             first_time 17.062  2.8260 17.000  0.000  29
## 8               menarche 12.761  1.3797 13.000  8.000  18
## 9    menstruation_length 28.987  3.1502 28.000 19.000  50
## 10 number_sexual_partner  8.948 10.9999  6.000  0.000 100
## 11           religiosity  2.208  1.3279  2.000  1.000   6

HC

all_surveys_included %>% filter(hormonal_contraception == T) %>% select(age, religiosity, first_time, menstruation_length, number_sexual_partner, menarche, bfi_agree, bfi_open, bfi_neuro, bfi_extra, bfi_consc) %>%
  gather(variable, value) %>%
  group_by(variable) %>%
  filter(!is.na(value)) %>%
  dplyr::summarise(mean = mean(value), sd = sd(value), 
            median = median(value),
            min = min(value), max  = max(value)) %>%
  data.frame()

##                 variable   mean     sd median    min     max
## 1                    age 23.973 4.5778 23.000 18.000  46.000
## 2              bfi_agree  3.737 0.6158  3.778  1.889   4.889
## 3              bfi_consc  3.630 0.6900  3.667  1.667   5.000
## 4              bfi_extra  3.487 0.8091  3.500  1.125   5.000
## 5              bfi_neuro  2.931 0.7676  2.875  1.000   4.625
## 6               bfi_open  3.733 0.6073  3.700  1.700   4.900
## 7             first_time 16.755 2.6479 17.000  0.000  32.000
## 8               menarche 12.749 1.2642 13.000  9.000  18.000
## 9    menstruation_length 27.643 2.3110 28.000 15.000  36.000
## 10 number_sexual_partner  6.204 9.1362  3.000  0.000 110.000
## 11           religiosity  2.157 1.3395  2.000  1.000   6.000

single women

NC

all_surveys_included %>% filter(hormonal_contraception == F & relationship_status == 1) %>% select(age, religiosity, first_time, menstruation_length, number_sexual_partner, menarche, bfi_agree, bfi_open, bfi_neuro, bfi_extra, bfi_consc) %>%
  gather(variable, value) %>%
  group_by(variable) %>%
  filter(!is.na(value)) %>%
  dplyr::summarise(mean = mean(value), sd = sd(value), 
            median = median(value),
            min = min(value), max  = max(value)) %>%
  data.frame()

##                 variable   mean      sd median    min     max
## 1                    age 24.399  4.8730 23.000 18.000  43.000
## 2              bfi_agree  3.696  0.5570  3.778  2.111   4.889
## 3              bfi_consc  3.381  0.6295  3.333  1.778   4.778
## 4              bfi_extra  3.461  0.7201  3.500  1.375   4.875
## 5              bfi_neuro  2.950  0.7800  3.000  1.125   5.000
## 6               bfi_open  3.857  0.6034  3.900  2.300   5.000
## 7             first_time 17.533  2.6300 17.000 13.000  27.000
## 8               menarche 12.645  1.3688 13.000  8.000  16.000
## 9    menstruation_length 28.862  3.1135 28.000 20.000  40.000
## 10 number_sexual_partner  8.803 13.7380  5.000  0.000 100.000
## 11           religiosity  2.123  1.2741  2.000  1.000   6.000

HC

all_surveys_included %>% filter(hormonal_contraception == T & relationship_status == 1) %>% select(age, religiosity, first_time, menstruation_length, number_sexual_partner, menarche, bfi_agree, bfi_open, bfi_neuro, bfi_extra, bfi_consc) %>%
  gather(variable, value) %>%
  group_by(variable) %>%
  filter(!is.na(value)) %>%
  dplyr::summarise(mean = mean(value), sd = sd(value), 
            median = median(value),
            min = min(value), max  = max(value)) %>%
  data.frame()

##                 variable   mean     sd median    min    max
## 1                    age 24.533 5.2866 23.500 18.000 42.000
## 2              bfi_agree  3.648 0.6010  3.667  2.111  4.889
## 3              bfi_consc  3.580 0.6847  3.611  1.667  4.778
## 4              bfi_extra  3.542 0.8207  3.625  1.500  4.875
## 5              bfi_neuro  2.894 0.7821  2.875  1.250  4.500
## 6               bfi_open  3.802 0.6816  3.800  1.700  4.900
## 7             first_time 16.793 2.2302 17.000 13.000 26.000
## 8               menarche 12.850 1.2599 13.000 10.000 16.000
## 9    menstruation_length 27.317 3.2703 28.000 15.000 35.000
## 10 number_sexual_partner  8.017 8.4462  5.000  0.000 40.000
## 11           religiosity  1.817 1.0813  1.000  1.000  5.000

compare groups

age

t.test(hc$age, nc$age)


    Welch Two Sample t-test

data:  hc$age and nc$age
t = -6.7, df = 622, p-value = 5e-11
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -3.302 -1.803
sample estimates:
mean of x mean of y 
    23.97     26.53

cohen.d(hc$age, nc$age, hedges.correction = T, pooled = F)

## 
## Hedges's g
## 
## g estimate: -0.4361 (small)
## 95 percent confidence interval:
##   lower   upper 
## -0.5867 -0.2855

religiosity

t.test(hc$religiosity, nc$religiosity)


    Welch Two Sample t-test

data:  hc$religiosity and nc$religiosity
t = -0.5, df = 495, p-value = 0.6
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.2504  0.1485
sample estimates:
mean of x mean of y 
    2.157     2.208

cohen.d(hc$religiosity, nc$religiosity, hedges.correction = T, pooled = F)

## 
## Hedges's g
## 
## g estimate: -0.03832 (negligible)
## 95 percent confidence interval:
##   lower   upper 
## -0.1874  0.1108

first time

t.test(hc$first_time, nc$first_time)


    Welch Two Sample t-test

data:  hc$first_time and nc$first_time
t = -1.5, df = 537, p-value = 0.1
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.7179  0.1035
sample estimates:
mean of x mean of y 
    16.75     17.06

cohen.d(hc$first_time, nc$first_time, hedges.correction = T, pooled = F, na.rm =T)

## 
## Hedges's g
## 
## g estimate: -0.1086 (negligible)
## 95 percent confidence interval:
##    lower    upper 
## -0.26005  0.04287

menarche

t.test(hc$menarche, nc$menarche)


    Welch Two Sample t-test

data:  hc$menarche and nc$menarche
t = -0.12, df = 540, p-value = 0.9
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.2061  0.1828
sample estimates:
mean of x mean of y 
    12.75     12.76

cohen.d(hc$menarche, nc$menarche, hedges.correction = T, pooled = F)

## 
## Hedges's g
## 
## g estimate: -0.008435 (negligible)
## 95 percent confidence interval:
##   lower   upper 
## -0.1575  0.1406

cycle length

t.test(hc$menstruation_length, nc$menstruation_length)


    Welch Two Sample t-test

data:  hc$menstruation_length and nc$menstruation_length
t = -6.8, df = 657, p-value = 3e-11
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.7334 -0.9543
sample estimates:
mean of x mean of y 
    27.64     28.99

cohen.d(hc$menstruation_length, nc$menstruation_length, hedges.correction = T, pooled = F)

## 
## Hedges's g
## 
## g estimate: -0.4262 (small)
## 95 percent confidence interval:
##   lower   upper 
## -0.5767 -0.2757

nr sex partner

t.test(hc$number_sexual_partner, nc$number_sexual_partner)


    Welch Two Sample t-test

data:  hc$number_sexual_partner and nc$number_sexual_partner
t = -3.7, df = 591, p-value = 0.0002
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -4.203 -1.285
sample estimates:
mean of x mean of y 
    6.204     8.948

cohen.d(hc$number_sexual_partner, nc$number_sexual_partner, hedges.correction = T, pooled = F)

## 
## Hedges's g
## 
## g estimate: -0.2492 (small)
## 95 percent confidence interval:
##    lower    upper 
## -0.39879 -0.09967

extraversion

t.test(hc$bfi_extra, nc$bfi_extra)


    Welch Two Sample t-test

data:  hc$bfi_extra and nc$bfi_extra
t = 1.2, df = 472, p-value = 0.2
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.04766  0.18955
sample estimates:
mean of x mean of y 
    3.487     3.416

cohen.d(hc$bfi_extra, nc$bfi_extra, hedges.correction = T, pooled = F)

## 
## Hedges's g
## 
## g estimate: 0.09308 (negligible)
## 95 percent confidence interval:
##    lower    upper 
## -0.05605  0.24220

agreeableness

t.test(hc$bfi_agree, nc$bfi_agree)


    Welch Two Sample t-test

data:  hc$bfi_agree and nc$bfi_agree
t = 1.5, df = 480, p-value = 0.1
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.02018  0.16135
sample estimates:
mean of x mean of y 
    3.737     3.667

cohen.d(hc$bfi_agree, nc$bfi_agree, hedges.correction = T, pooled = F)

## 
## Hedges's g
## 
## g estimate: 0.1195 (negligible)
## 95 percent confidence interval:
##   lower   upper 
## -0.0297  0.2686

neuroticism

t.test(hc$bfi_neuro, nc$bfi_neuro)


    Welch Two Sample t-test

data:  hc$bfi_neuro and nc$bfi_neuro
t = -0.84, df = 497, p-value = 0.4
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.16318  0.06582
sample estimates:
mean of x mean of y 
    2.931     2.980

cohen.d(hc$bfi_neuro, nc$bfi_neuro, hedges.correction = T, pooled = F)

## 
## Hedges's g
## 
## g estimate: -0.06359 (negligible)
## 95 percent confidence interval:
##   lower   upper 
## -0.2127  0.0855

conscientiousness

t.test(hc$bfi_consc, nc$bfi_consc)


    Welch Two Sample t-test

data:  hc$bfi_consc and nc$bfi_consc
t = 2.8, df = 476, p-value = 0.005
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 0.04403 0.24677
sample estimates:
mean of x mean of y 
    3.630     3.484

cohen.d(hc$bfi_consc, nc$bfi_consc, hedges.correction = T, pooled = F)

## 
## Hedges's g
## 
## g estimate: 0.222 (small)
## 95 percent confidence interval:
##   lower   upper 
## 0.07251 0.37142

openness

t.test(hc$bfi_open, nc$bfi_open)


    Welch Two Sample t-test

data:  hc$bfi_open and nc$bfi_open
t = -1.9, df = 507, p-value = 0.05
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.181260  0.001173
sample estimates:
mean of x mean of y 
    3.733     3.823

cohen.d(hc$bfi_open, nc$bfi_open, hedges.correction = T, pooled = F)

## 
## Hedges's g
## 
## g estimate: -0.1455 (negligible)
## 95 percent confidence interval:
##     lower     upper 
## -0.294730  0.003724

Time spent with whom

diary %>% filter(reasons_for_exclusion == "") %>% 
  select(time_family, person_is_related_man, person_is_related_woman) %>% cor(use = 'p') %>% round(2)

##                         time_family person_is_related_man person_is_related_woman
## time_family                    1.00                  0.21                    0.24
## person_is_related_man          0.21                  1.00                    0.40
## person_is_related_woman        0.24                  0.40                    1.00

diary %>% filter(reasons_for_exclusion == "") %>% 
  xtabs(~ time_family + I(person_is_related_man>0) + I(person_is_related_woman>0), .)

## , , I(person_is_related_woman > 0) = FALSE
## 
##            I(person_is_related_man > 0)
## time_family FALSE TRUE
##           0  1643   25
##           1  1959   24
##           2  2158   39
##           3  1049   44
##           4   478   50
## 
## , , I(person_is_related_woman > 0) = TRUE
## 
##            I(person_is_related_man > 0)
## time_family FALSE TRUE
##           0    51   19
##           1   101   30
##           2   206   58
##           3   182   85
##           4   172  139

Exclusion reasons

Between subjects

exclusion_reasons <- all_surveys %>% 
  mutate(reasons_for_exclusion = str_sub(reasons_for_exclusion, 1, -3)) %>% 
  select(session, reasons_for_exclusion) %>% 
  drop_na(session) %>% 
  comma_separated_to_columns(reasons_for_exclusion)

exclusion_reasons_table <- exclusion_reasons %>% 
  select(-session) %>% 
  summarise_all(sum) %>% 
  sort() %>% 
  gather(reason, n) %>% 
  left_join(diary %>% mutate(reason = str_sub(reasons_for_exclusion, 1, -3)) %>% group_by(reason) %>% summarise(unique = n())) %>% 
  mutate(unique = if_else(is.na(unique), 0L, unique)) %>% 
  left_join(exclusion_reasons %>% 
  gather(reason, n, -session) %>% 
  filter(n > 0) %>% 
  distinct(session, reason, n) %>% 
  group_by(reason) %>%
  summarise(n_women = sum(n)))

library(UpSetR)
exclusion_reasons %>% 
  filter(included == 0) %>% 
  select(-included) %>% 
  as.data.frame() %>% 
  {
  upset(., ncol(.), 20, show.numbers = TRUE, order.by = "freq",
      main.bar.color = "#6E8691",
      matrix.color = "#6E8691",
      sets.bar.color = "#53AC9B")
  }

exclusion_reasons_table %>% 
  knitr::kable(caption = "Reasons for exclusion. _n_ shows the number of affected women, _unique_ those who for whom this was the only reason to be excluded.")

Reasons for exclusion. n shows the number of affected women, *unique* those who for whom this was the only reason to be excluded.
reason	n	unique	n_women
sex_hormones	7	123	7
non_heterosexual_relationship	9	0	9
switched_contraception	18	135	18
pregnant	23	0	23
not_heterosexual_female	26	175	26
breast_feeding	28	0	28
older_than_50	35	0	35
menopausal	41	0	41
diary_days_not_usable	45	0	45
didnt_finish_demographics	53	0	53
didnt_do_diary	66	0	66
fertility_never_estimable	143	0	143
living_with_parents	145	1476	145
didnt_finish_personality	196	0	196
no_regular_menstruation	302	2349	302
included	794	0	794

Diary

exclusion_reasons_diary <- diary %>% 
  mutate(reasons_for_exclusion = str_sub(reasons_for_exclusion, 1, -3)) %>% 
  select(session, created_date, reasons_for_exclusion) %>% 
  drop_na(session, created_date) %>% 
  comma_separated_to_columns(reasons_for_exclusion) %>% 
  select( -created_date)

exclusion_reasons_diary_table <- exclusion_reasons_diary %>% 
  select(-session) %>% 
  summarise_all(sum) %>% 
  sort() %>% 
  gather(reason, n) %>% 
  left_join(diary %>% mutate(reason = str_sub(reasons_for_exclusion, 1, -3)) %>% group_by(reason) %>% summarise(unique = n())) %>% 
  mutate(unique = if_else(is.na(unique), 0L, unique)) %>% 
  left_join(exclusion_reasons_diary %>% 
  gather(reason, n, -session) %>% 
  filter(n > 0) %>% 
  distinct(session, reason, n) %>% 
  group_by(reason) %>%
  summarise(n_women = sum(n)))

library(UpSetR)
exclusion_reasons_diary %>% 
  filter(included == 0) %>% 
  select(-included) %>% 
  as.data.frame() %>% 
  {
  upset(., ncol(.), 20, show.numbers = TRUE, order.by = "freq",
      main.bar.color = "#6E8691",
      matrix.color = "#6E8691",
      sets.bar.color = "#53AC9B")
  }

exclusion_reasons_diary_table %>% 
  knitr::kable(caption = "Reasons for exclusion. _n_ shows the number of affected women, _unique_ those who for whom this was the only reason to be excluded.")

Reasons for exclusion. n shows the number of affected women, *unique* those who for whom this was the only reason to be excluded.
reason	n	unique	n_women
dishonest_answer	150	60	86
didnt_do_diary	187	0	28
non_heterosexual_relationship	338	0	8
sex_hormones	442	123	7
did_not_finish_entry	856	123	542
not_heterosexual_female	896	175	16
pregnant	1047	0	19
breast_feeding	1137	0	22
switched_contraception	1178	135	18
older_than_50	1560	0	28
menopausal	2166	0	39
diary_days_not_usable	2361	0	45
fertility_never_estimable	2527	0	143
next_menstrual_onset_unobserved	4100	575	207
cycle_shorter_than_20	4736	830	476
living_with_parents	6587	1476	123
cycle_longer_than_40	7461	536	222
included	12251	0	263
no_regular_menstruation	13237	2349	245
skipped_diary_entry	15321	1960	1245
not_single	51899	22659	922

Descriptives

Julie Driebe & Ruben Arslan

20 12 2020

Fertility and Kin - Descriptives

load and prepare data

descriptives

distribution

NC

HC

single women

NC

HC

compare groups

age

religiosity

first time

menarche

cycle length

nr sex partner

extraversion

agreeableness

neuroticism

conscientiousness

openness

Time spent with whom

Exclusion reasons

Between subjects

Diary