library(knitr)
opts_chunk$set(tidy = FALSE,cache = F, warning = F, message = F)
## ausfuehren zeilenweise mit dem run-knopf (rechts oben in diesem fenster-segment)
## oder mit strg-enter
source("0_helpers.R")
# get credentials
source(".password.R")
# formr_connect(credentials$user, credentials$password, host = credentials$host)
# credentials = list(host = credentials$host)
theme_set(theme_tufte(base_size = 12, base_family = 'Helvetica Neue'))
# einleit = as.data.table(formr_raw_results("Einleitender_Fragebogen", host = credentials$host))
# einleit_WAMS = as.data.table(formr_raw_results("Einleitender_Fragebogen_WAMS", host = credentials$host))
# einleit = setDF(rbind.fill(einleit, einleit_WAMS))
# rm(einleit_WAMS)
# einleit_items = formr_items("Einleitender_Fragebogen_WAMS", host = credentials$host)
# einleit = as.data.table(formr_post_process_results(einleit_items, einleit, compute_alphas = T,plot_likert = T ))
#
# vorab_old = as.data.table(formr_raw_results("Vorab_Fragebogen1", host = credentials$host))
# vorab_WAMS = as.data.table(formr_raw_results("Vorab_Fragebogen1_WAMS", host = credentials$host))
# vorab_items = formr_items("Vorab_Fragebogen1_WAMS", host = credentials$host)
#
# tagebuch = as.data.table(formr_results("Taeglicher_Fragebogen_1", host = credentials$host))
#
# abschluss = as.data.table(formr_results("Abschluss_Fragebogen_1", host = credentials$host))
# nachbe = formr_results("Nachbefragung_Menstruation", host = credentials$host) %>% as.data.table()
# nachbe_other_hormonal = as.data.table(formr_results("Nachbefragung_other_hormonal", compute_alphas = T, host = credentials$host))
# test_sessions = c(einleit[duplicated(session), session], "616ccf34994fcf7fa170a49c676496a96b26e9315ee81180939fc8bd951ea315")
#
# einleit = einleit[ !session %in% test_sessions, ]
# einleit = einleit[ !is.na(agreement), ]
# stopifnot(0 == sum(duplicated(einleit$session)))
#
# vorab_old = vorab_old[ !session %in% test_sessions, ]
# vorab_WAMS = vorab_WAMS[ !session %in% test_sessions, ]
# vorab = setDF(rbind.fill(vorab_old, vorab_WAMS))
# vorab = as.data.table(formr_post_process_results(vorab_items, vorab,compute_alphas = T,plot_likert = T ))
# stopifnot(0 == sum(duplicated(vorab$session)))
# vorab[is.na(BFI_consc_8), ended := NA] # interrupters who we disabled
#
# abschluss = abschluss[ order( rev(created)) , ]
# abschluss = abschluss[ !duplicated(session), ]
# stopifnot(0 == sum(duplicated(abschluss$session)))
#
# nachbe_other_hormonal$session_id = as.numeric(nachbe_other_hormonal$session_id )
# nachbe_other_hormonal = nachbe_other_hormonal[!session_id %in% c(171983, 171984), ]
# stopifnot(0 == sum(duplicated(nachbe_other_hormonal$session)))
# theme_set(theme_tufte(base_size = 20, base_family = 'Helvetica Neue'))
# einleit = einleit %>% select(-email, -referring_website, -browser, -mobile_nr)
# save(einleit, vorab, tagebuch, abschluss,nachbe, nachbe_other_hormonal, file = 'pretty_raw.rdata')
load("pretty_raw.rdata")
p0 = merge(einleit[,list(session,age, gender, sex_orientation, hetero_relationship)], vorab, by = "session", all.x = T)
p1 <- merge(p0, abschluss, by = "session", all.x = T, suffixes = c(".vorab",".abschluss"))
names(nachbe_other_hormonal)[2:5] = paste0(names(nachbe_other_hormonal)[2:5], ".nachbe_other_hormonal")
xsection <- merge(p1, nachbe_other_hormonal, by = "session", all.x = T)
pipeline_hc = pipeline = list()
pipeline$signed_up = sum(!is.na(xsection$age))
xsection = xsection %>% filter(!is.na(age))
pipeline$ineligible = sum(xsection$gender != 1 | xsection$hetero_relationship != 1, na.rm = T)
xsection = xsection %>% filter(gender == 1 & hetero_relationship == 1)
pipeline$did_not_complete_pre_survey = sum(is.na(xsection$ended.vorab), na.rm = TRUE)
xsection = xsection %>% filter(!is.na(ended.vorab)) #only those who ended vorab
pipeline$finished_pre_survey = nrow(xsection)
tagebuch = tagebuch[ !is.na(ended) & !is.na(session) & session %in% xsection$session, ] # no duds/testers
tagebuch$created_date = as.Date(tagebuch$created - dhours(9))
tagebuch$sess_day = paste(tagebuch$session, tagebuch$created_date)
table(duplicated(tagebuch$sess_day)) # 41 dupes due to an early bug
##
## FALSE TRUE
## 30930 41
setkey(tagebuch, session, created, ended, modified)
tagebuch = tagebuch[!duplicated(tagebuch$sess_day), ]
xsection$hypothesis_guessed = NA_character_ # Hypothese erraten variable erstmal bei allen auf “unkodiert” setzen
# cat(xsection[ ! is.na(meaning_study) & is.na(hypothesis_guessed), str_c( str_sub(session, 1, 10),"\n", meaning_study,"\n\n") ]) # zeigt alle die was geschrieben haben, aber nur die bei denen wir die Hypothese noch nicht kodiert haben
## beginnend bei Nr. 2 Abschluss FB
xsection$short = str_sub(xsection$session,1,10)
xsection[ short == "c8cabb2407", hypothesis_guessed := "pille" ]
xsection[ short == "0d2804a063", hypothesis_guessed := "" ]
xsection[ short == "df96f1da5c", hypothesis_guessed := "" ]
xsection[ short == "a9995fe3d9", hypothesis_guessed := "" ]
xsection[ short == "3f776540f0", hypothesis_guessed := "zyklus, pille" ]
xsection[ short == "b726d9cd36", hypothesis_guessed := "" ]
xsection[ short == "2a656528d8", hypothesis_guessed := "" ]
xsection[ short == "1794115cf9", hypothesis_guessed := "" ]
xsection[ short == "6d97f9db84", hypothesis_guessed := "" ]
xsection[ short == "5528d129af", hypothesis_guessed := "" ]
xsection[ short == "3f776540f0", hypothesis_guessed := "zyklus, pille" ]
xsection[ short == "d323f2f23e", hypothesis_guessed := "" ]
xsection[ short == "0d2804a063", hypothesis_guessed := "" ]
xsection[ short == "b45e6029af", hypothesis_guessed := "" ]
xsection[ short == "06d8ad9352", hypothesis_guessed := "" ]
xsection[ short == "daf19d11c5", hypothesis_guessed := "" ]
xsection[ short == "ad9c9b9040", hypothesis_guessed := "" ]
xsection[ short == "602532ed06", hypothesis_guessed := "" ]
xsection[ short == "ef5d7a4afe", hypothesis_guessed := "" ]
xsection[ short == "4bb6d35c60", hypothesis_guessed := "" ]
xsection[ short == "2709df5b65", hypothesis_guessed := "" ]
xsection[ short == "1d55f16def", hypothesis_guessed := "" ]
xsection[ short == "1efa2a7b59", hypothesis_guessed := "" ]
xsection[ short == "0ce9853874", hypothesis_guessed := "" ]
xsection[ short == "e164a28749", hypothesis_guessed := "" ]
xsection[ short == "304bca7f58", hypothesis_guessed := "" ]
xsection[ short == "21054c807c", hypothesis_guessed := "" ]
xsection[ short == "35bb4b9d0e", hypothesis_guessed := "" ]
xsection[ short == "0e8fb367d5", hypothesis_guessed := "" ]
xsection[ short == "2001e6b0ba", hypothesis_guessed := "" ]
xsection[ short == "4adad13adb", hypothesis_guessed := "" ]
xsection[ short == "43273cd6f1", hypothesis_guessed := "" ]
xsection[ short == "7d15b96270", hypothesis_guessed := "zyklus" ]
xsection[ short == "129ac3ab6d", hypothesis_guessed := "" ]
xsection[ short == "f9cc25fe9f", hypothesis_guessed := "zyklus" ]
xsection[ short == "2384901dd0", hypothesis_guessed := "" ]
xsection[ short == "e13702498c", hypothesis_guessed := "" ]
xsection[ short == "df96f1da5c", hypothesis_guessed := "" ]
xsection[ short == "610e6193fc", hypothesis_guessed := "" ]
xsection[ short == "14c0d954f2", hypothesis_guessed := "" ]
xsection[ short == "09521151c3", hypothesis_guessed := "" ]
xsection[ short == "74c219c7b4", hypothesis_guessed := "" ]
xsection[ short == "8a656581da", hypothesis_guessed := "" ]
xsection[ short == "714c09c81d", hypothesis_guessed := "" ]
xsection[ short == "968add9413", hypothesis_guessed := "" ]
xsection[ short == "4b1c22b386", hypothesis_guessed := "zyklus" ]
xsection[ short == "741097daab", hypothesis_guessed := "menstruation" ]
xsection[ short == "2fff8152a1", hypothesis_guessed := "" ]
xsection[ short == "9c1c191a8c", hypothesis_guessed := "" ]
xsection[ short == "fae818546b", hypothesis_guessed := "" ]
xsection[ short == "905fa66d73", hypothesis_guessed := "" ]
xsection[ short == "a9995fe3d9", hypothesis_guessed := "zyklus" ]
xsection[ short == "9e7ca102d1", hypothesis_guessed := "" ]
xsection[ short == "c368a1d25c", hypothesis_guessed := "zyklus" ]
xsection[ short == "7f71877363", hypothesis_guessed := "" ]
xsection[ short == "354137dd7f", hypothesis_guessed := "" ]
xsection[ short == "d7c1497736", hypothesis_guessed := "" ]
xsection[ short == "5a52338d59", hypothesis_guessed := "" ]
xsection[ short == "1f21c8d01f", hypothesis_guessed := "" ]
xsection[ short == "4402465c96", hypothesis_guessed := "zyklus" ]
xsection[ short == "8de9e8403b", hypothesis_guessed := "" ]
xsection[ short == "56306eba82", hypothesis_guessed := "" ]
xsection[ short == "34015d752d", hypothesis_guessed := "" ]
xsection[ short == "935ade8964", hypothesis_guessed := "" ]
xsection[ short == "75b7a0b580", hypothesis_guessed := "" ]
xsection[ short == "7a5e6c3fe0", hypothesis_guessed := "" ]
xsection[ short == "24af1309b4", hypothesis_guessed := "" ]
xsection[ short == "6a32bf9e58", hypothesis_guessed := "" ]
xsection[ short == "c6e2a0e0f5", hypothesis_guessed := "" ]
xsection[ short == "a269d1c49b", hypothesis_guessed := "zyklus" ]
xsection[ short == "41a22ad56a", hypothesis_guessed := "" ]
xsection[ short == "2a32bc55a7", hypothesis_guessed := "zyklus" ]
xsection[ short == "7e3f2d22a8", hypothesis_guessed := "" ]
xsection[ short == "c680d6faed", hypothesis_guessed := "" ]
xsection[ short == "28a22bdb79", hypothesis_guessed := "" ]
xsection[ short == "b7313e1259", hypothesis_guessed := "" ]
xsection[ short == "76f7da03e6", hypothesis_guessed := "" ]
xsection[ short == "236a466536", hypothesis_guessed := "" ]
xsection[ short == "d8c77b5d7e", hypothesis_guessed := "" ]
xsection[ short == "998a173634", hypothesis_guessed := "" ]
xsection[ short == "8765bada78", hypothesis_guessed := "" ]
xsection[ short == "eb50f2a1b0", hypothesis_guessed := "zyklus" ]
xsection[ short == "75ca1e55f3", hypothesis_guessed := "" ]
xsection[ short == "36bf3909cd", hypothesis_guessed := "" ]
xsection[ short == "6b4c442294", hypothesis_guessed := "" ]
xsection[ short == "cfd0e370c2", hypothesis_guessed := "" ]
xsection[ short == "da9c04f1dc", hypothesis_guessed := "" ]
xsection[ short == "b15de9d994", hypothesis_guessed := "" ]
xsection[ short == "927a815661", hypothesis_guessed := "" ]
xsection[ short == "f206eb6c66", hypothesis_guessed := "" ]
xsection[ short == "a05777d688", hypothesis_guessed := "" ]
xsection[ short == "008e1a3846", hypothesis_guessed := "" ]
xsection[ short == "77bec8f9fa", hypothesis_guessed := "" ]
xsection[ short == "ffe3de7ada", hypothesis_guessed := "" ]
xsection[ short == "ce9110b32c", hypothesis_guessed := "" ]
xsection[ short == "3d236f5b13", hypothesis_guessed := "zyklus" ]
xsection[ short == "31fae1e494", hypothesis_guessed := "" ]
xsection[ short == "7d63af564f", hypothesis_guessed := "" ]
xsection[ short == "0643a31c42", hypothesis_guessed := "" ]
xsection[ short == "6a451c3559", hypothesis_guessed := "" ]
xsection[ short == "1794115cf9", hypothesis_guessed := "" ]
xsection[ short == "bde2228088", hypothesis_guessed := "" ]
xsection[ short == "7bcb402ac2", hypothesis_guessed := "" ]
xsection[ short == "56e91fba0d", hypothesis_guessed := "" ]
xsection[ short == "e4878941c8", hypothesis_guessed := "" ]
xsection[ short == "d9040996f0", hypothesis_guessed := "pille" ]
xsection[ short == "1b186d0bdf", hypothesis_guessed := "" ]
xsection[ short == "a8f5c15eab", hypothesis_guessed := "zyklus" ]
xsection[ short == "5fa900360e", hypothesis_guessed := "" ]
xsection[ short == "0659e0ce76", hypothesis_guessed := "" ]
xsection[ short == "99f35b7279", hypothesis_guessed := "" ]
xsection[ short == "245e4d3f8e", hypothesis_guessed := "zyklus" ]
xsection[ short == "bd88ffe4c2", hypothesis_guessed := "" ]
xsection[ short == "9400580d09", hypothesis_guessed := "" ]
xsection[ short == "9a38c0f7a8", hypothesis_guessed := "" ]
xsection[ short == "90e1277822", hypothesis_guessed := "" ]
xsection[ short == "02b30a1e88", hypothesis_guessed := "" ]
xsection[ short == "ae90c6f50d", hypothesis_guessed := "" ]
xsection[ short == "43a25b3770", hypothesis_guessed := "" ]
xsection[ short == "58f42542f3", hypothesis_guessed := "" ]
xsection[ short == "5770242e01", hypothesis_guessed := "" ]
xsection[ short == "fe928a062c", hypothesis_guessed := "" ]
xsection[ short == "0fcec7e9e9", hypothesis_guessed := "" ]
xsection[ short == "44f48a8b61", hypothesis_guessed := "" ]
xsection[ short == "be468466cb", hypothesis_guessed := "" ]
xsection[ short == "8de9e8403b", hypothesis_guessed := "" ]
xsection[ short == "daf776aeb5", hypothesis_guessed := "pille, zyklus" ]
xsection[ short == "c02d06c4b6", hypothesis_guessed := "pille" ]
xsection[ short == "24ed183204", hypothesis_guessed := "" ]
xsection[ short == "dd208c8a34", hypothesis_guessed := "" ]
xsection[ short == "5089861ec1", hypothesis_guessed := "" ]
xsection[ short == "6e977547c4", hypothesis_guessed := "" ]
xsection[ short == "f786b346c8", hypothesis_guessed := "" ]
xsection[ short == "96751721dd", hypothesis_guessed := "" ]
xsection[ short == "f5448770a6", hypothesis_guessed := "" ]
xsection[ short == "646c651ea4", hypothesis_guessed := "" ]
xsection[ short == "9f8b9c389a", hypothesis_guessed := "" ]
xsection[ short == "411c91350a", hypothesis_guessed := "" ]
xsection[ short == "21b5146134", hypothesis_guessed := "" ]
xsection[ short == "445c1bbf06", hypothesis_guessed := "" ]
xsection[ short == "b068b06b12", hypothesis_guessed := "" ]
xsection[ short == "ec4505ed59", hypothesis_guessed := "" ]
xsection[ short == "6a01498107", hypothesis_guessed := "" ]
xsection[ short == "f1177b77af", hypothesis_guessed := "" ]
xsection[ short == "689259c4a0", hypothesis_guessed := "" ]
xsection[ short == "c5a8fe1c2e", hypothesis_guessed := "" ]
xsection[ short == "b726d9cd36", hypothesis_guessed := "" ]
xsection[ short == "f42ae3caec", hypothesis_guessed := "" ]
xsection[ short == "cb90b67420", hypothesis_guessed := "" ]
xsection[ short == "28012aed5e", hypothesis_guessed := "" ]
xsection[ short == "9bb8c4ea35", hypothesis_guessed := "" ]
xsection[ short == "fa12f9e203", hypothesis_guessed := "" ]
xsection[ short == "64bd3dba20", hypothesis_guessed := "pille" ]
xsection[ short == "7b2fcea8a5", hypothesis_guessed := "" ]
xsection[ short == "19bca0c45f", hypothesis_guessed := "" ]
xsection[ short == "d79c17faef", hypothesis_guessed := "" ]
xsection[ short == "6d97f9db84", hypothesis_guessed := "" ]
xsection[ short == "2a656528d8", hypothesis_guessed := "" ]
xsection[ short == "82d0f82850", hypothesis_guessed := "" ]
xsection[ short == "0a28af0a59", hypothesis_guessed := "" ]
xsection[ short == "e2f049ef95", hypothesis_guessed := "" ]
xsection[ short == "28a246460d", hypothesis_guessed := "" ]
xsection[ short == "44ed3a0f94", hypothesis_guessed := "" ]
xsection[ short == "128e5f2f9a", hypothesis_guessed := "" ]
xsection[ short == "53bf4f5456", hypothesis_guessed := "" ]
xsection[ short == "9f61438d5f", hypothesis_guessed := "" ]
xsection[ short == "5530688d48", hypothesis_guessed := "" ]
xsection[ short == "cd75c19fa1", hypothesis_guessed := "" ]
xsection[ short == "59131d88d4", hypothesis_guessed := "" ]
xsection[ short == "8a891f90ce", hypothesis_guessed := "" ]
xsection[ short == "37276e25e2", hypothesis_guessed := "zyklus" ]
xsection[ short == "8ec26c9036", hypothesis_guessed := "zyklus" ]
xsection[ short == "01c277d3d0", hypothesis_guessed := "" ]
xsection[ short == "c6dd42db4b", hypothesis_guessed := "" ]
xsection[ short == "b1ec2b199e", hypothesis_guessed := "" ]
xsection[ short == "79bf667244", hypothesis_guessed := "" ]
xsection[ short == "38bb042edd", hypothesis_guessed := "" ]
xsection[ short == "1c40c3e9b3", hypothesis_guessed := "" ]
xsection[ short == "a996908c50", hypothesis_guessed := "" ]
xsection[ short == "2e7a8138ce", hypothesis_guessed := "" ]
xsection[ short == "e8142dfc42", hypothesis_guessed := "" ]
xsection[ short == "74996f8517", hypothesis_guessed := "" ]
xsection[ short == "5528d129af", hypothesis_guessed := "" ]
xsection[ short == "fe1da43c1c", hypothesis_guessed := "" ]
xsection[ short == "ad5b746840", hypothesis_guessed := "" ]
xsection[ short == "21d26d2761", hypothesis_guessed := "" ]
xsection[ short == "a902d3062e", hypothesis_guessed := "" ]
xsection[ short == "4e9893da3b", hypothesis_guessed := "" ]
xsection[ short == "220d10348e", hypothesis_guessed := "" ]
xsection[ short == "23edbdf284", hypothesis_guessed := "" ]
xsection[ short == "25199bbae1", hypothesis_guessed := "" ]
xsection[ short == "abe4c9949e", hypothesis_guessed := "" ]
# Kodierung Hanne reinkopiert
xsection[ short == "03b4587939", hypothesis_guessed := "" ]
xsection[ short == "7392ad15ff", hypothesis_guessed := "" ]
xsection[ short == "0de89e1e3c", hypothesis_guessed := "" ]
xsection[ short == "21523ad67c", hypothesis_guessed := "" ]
xsection[ short == "16d4342214", hypothesis_guessed := "" ]
xsection[ short == "b9294fe7be", hypothesis_guessed := "" ]
xsection[ short == "6d3d339b5c", hypothesis_guessed := "" ]
xsection[ short == "2ccbf1f5a6", hypothesis_guessed := "" ]
xsection[ short == "9787db1a54", hypothesis_guessed := "" ]
xsection[ short == "3b4ef0781b", hypothesis_guessed := "" ]
xsection[ short == "38cb0923c8", hypothesis_guessed := "" ]
xsection[ short == "008ee8cd6a", hypothesis_guessed := "" ]
xsection[ short == "6bf2fadeab", hypothesis_guessed := "" ]
xsection[ short == "3d58cbb50e", hypothesis_guessed := "" ]
xsection[ short == "5cb71cce8b", hypothesis_guessed := "" ]
xsection[ short == "79e1cb73fe", hypothesis_guessed := "" ]
xsection[ short == "c4b0225e39", hypothesis_guessed := "" ]
xsection[ short == "c7eb229822", hypothesis_guessed := "" ]
xsection[ short == "5817ad784e", hypothesis_guessed := "" ]
xsection[ short == "b0ef6c5149", hypothesis_guessed := "" ]
xsection[ short == "c70fa419b0", hypothesis_guessed := "" ]
xsection[ short == "1f4a0cf36f", hypothesis_guessed := "" ]
xsection[ short == "2dafb15a0a", hypothesis_guessed := "" ]
xsection[ short == "fb035fee5a", hypothesis_guessed := "" ]
xsection[ short == "7a76030651", hypothesis_guessed := "" ]
xsection[ short == "b5af1bfbf8", hypothesis_guessed := "" ]
xsection[ short == "d1ea29bce0", hypothesis_guessed := "" ]
xsection[ short == "61d9f3549e", hypothesis_guessed := "" ]
xsection[ short == "f480464fff", hypothesis_guessed := "" ]
xsection[ short == "c4c8152b39", hypothesis_guessed := "" ]
xsection[ short == "dd791f1ba9", hypothesis_guessed := "" ]
xsection[ short == "17ecc0eef7", hypothesis_guessed := "" ]
xsection[ short == "4baca29e4a", hypothesis_guessed := "" ]
xsection[ short == "c3d67296d9", hypothesis_guessed := "" ]
xsection[ short == "c78f64a555", hypothesis_guessed := "zyklus" ]
xsection[ short == "2b408b1e5b", hypothesis_guessed := "" ]
xsection[ short == "ddf84eabe6", hypothesis_guessed := "" ]
xsection[ short == "1630e644ea", hypothesis_guessed := "" ]
xsection[ short == "6944eadc16", hypothesis_guessed := "" ]
xsection[ short == "534af6e9d1", hypothesis_guessed := "" ]
xsection[ short == "f7f94664b3", hypothesis_guessed := "" ]
xsection[ short == "c807b3d9ff", hypothesis_guessed := "" ]
xsection[ short == "4233be9e00", hypothesis_guessed := "" ]
xsection[ short == "b8aa6808e6", hypothesis_guessed := "" ]
xsection[ short == "5b4fad6f3f", hypothesis_guessed := "" ]
xsection[ short == "31f770fe6a", hypothesis_guessed := "" ]
xsection[ short == "a27b5c61e7", hypothesis_guessed := "" ]
xsection[ short == "e5da8d89ef", hypothesis_guessed := "" ]
xsection[ short == "833a015ee7", hypothesis_guessed := "" ]
xsection[ short == "5a27112b81", hypothesis_guessed := "" ]
xsection[ short == "a781509efd", hypothesis_guessed := "" ]
xsection[ short == "d397efe829", hypothesis_guessed := "pille" ]
xsection[ short == "0069356382", hypothesis_guessed := "" ]
xsection[ short == "75c9e5f68a", hypothesis_guessed := "" ]
xsection[ short == "4167741e29", hypothesis_guessed := "" ]
xsection[ short == "1b6147d4d2", hypothesis_guessed := "" ]
xsection[ short == "39800782ec", hypothesis_guessed := "" ]
xsection[ short == "83c84a5860", hypothesis_guessed := "" ]
xsection[ short == "6b8a2e2171", hypothesis_guessed := "" ]
xsection[ short == "742ef3fc8f", hypothesis_guessed := "" ]
xsection[ short == "1f8cff71d7", hypothesis_guessed := "" ]
xsection[ short == "ad9c9b9040", hypothesis_guessed := "" ]
xsection[ short == "cea9927c59", hypothesis_guessed := "" ]
xsection[ short == "df09cb4538", hypothesis_guessed := "" ]
xsection[ short == "99e3b135c8", hypothesis_guessed := "" ]
xsection[ short == "910d9ca63a", hypothesis_guessed := "" ]
xsection[ short == "e7e992c12b", hypothesis_guessed := "" ]
xsection[ short == "23e08d5150", hypothesis_guessed := "" ]
xsection[ short == "942fd09099", hypothesis_guessed := "" ]
xsection[ short == "2f401ccc7c", hypothesis_guessed := "" ]
xsection[ short == "63df0811f6", hypothesis_guessed := "" ]
xsection[ short == "8d46295b6e", hypothesis_guessed := "" ]
xsection[ short == "b24ef936be", hypothesis_guessed := "" ]
xsection[ short == "a7f647e8cb", hypothesis_guessed := "" ]
xsection[ short == "f5b82733fb", hypothesis_guessed := "" ]
xsection[ short == "aa5c29c4db", hypothesis_guessed := "" ]
xsection[ short == "897feb9564", hypothesis_guessed := "" ]
xsection[ short == "03cf5578bd", hypothesis_guessed := "" ]
xsection[ short == "34e3dfaf38", hypothesis_guessed := "" ]
xsection[ short == "258a5b9029", hypothesis_guessed := "" ]
xsection[ short == "9e7394a8ce", hypothesis_guessed := "" ]
xsection[ short == "c0a44a7da5", hypothesis_guessed := "" ]
xsection[ short == "8486317f22", hypothesis_guessed := "" ]
xsection[ short == "d6cce71fb6", hypothesis_guessed := "" ]
xsection[ short == "f67c42dab9", hypothesis_guessed := "" ]
xsection[ short == "7caf85a1eb", hypothesis_guessed := "" ]
xsection[ short == "c9eb12c2b6", hypothesis_guessed := "" ]
xsection[ short == "b554aec2b1", hypothesis_guessed := "" ]
xsection[ short == "dc15d44631", hypothesis_guessed := "" ]
xsection[ short == "20804b1327", hypothesis_guessed := "zyklus" ]
xsection[ short == "94a84368ce", hypothesis_guessed := "" ]
xsection[ short == "57c5087f95", hypothesis_guessed := "" ]
xsection[ short == "ea963653a1", hypothesis_guessed := "" ]
xsection[ short == "0815462c77", hypothesis_guessed := "" ]
xsection[ short == "df3fa2f601", hypothesis_guessed := "" ]
xsection[ short == "768be5419d", hypothesis_guessed := "" ]
xsection[ short == "c17df3e87a", hypothesis_guessed := "" ]
xsection[ short == "19e8cd093e", hypothesis_guessed := "" ]
xsection[ short == "8d51dfefa0", hypothesis_guessed := "" ]
xsection[ short == "3fe6cdbe1e", hypothesis_guessed := "" ]
xsection[ short == "470e0e5037", hypothesis_guessed := "" ]
xsection[ short == "719f885747", hypothesis_guessed := "" ]
xsection[ short == "6d3d494d9c", hypothesis_guessed := "" ]
xsection[ short == "c21f988273", hypothesis_guessed := "" ]
xsection[ short == "48e45a498f", hypothesis_guessed := "" ]
xsection[ short == "c729a3bdf3", hypothesis_guessed := "" ]
xsection[ short == "5803bcd6a2", hypothesis_guessed := "" ]
xsection[ short == "e9dfe8b9fa", hypothesis_guessed := "" ]
xsection[ short == "3e42652d76", hypothesis_guessed := "" ]
xsection[ short == "100249933a", hypothesis_guessed := "" ]
xsection[ short == "7d9c91f515", hypothesis_guessed := "" ]
xsection[ short == "95ce064457", hypothesis_guessed := "" ]
xsection[ short == "abe8a003bb", hypothesis_guessed := "" ]
xsection[ short == "0eb89ad851", hypothesis_guessed := "" ]
xsection[ short == "63d9ae1c8d", hypothesis_guessed := "" ]
xsection[ short == "a34640c141", hypothesis_guessed := "pille" ]
xsection[ short == "b33dc11492", hypothesis_guessed := "" ]
xsection[ short == "ae72052f5a", hypothesis_guessed := "" ]
xsection[ short == "e9159a3bff", hypothesis_guessed := "" ]
xsection[ short == "023f1ef6ac", hypothesis_guessed := "" ]
xsection[ short == "58fa886642", hypothesis_guessed := "" ]
xsection[ short == "cc48a48f24", hypothesis_guessed := "" ]
xsection[ short == "f0e3e3814c", hypothesis_guessed := "" ]
xsection[ short == "2d89985ad2", hypothesis_guessed := "" ]
xsection[ short == "90131fc94d", hypothesis_guessed := "" ]
xsection[ short == "393959f84d", hypothesis_guessed := "" ]
xsection[ short == "2e024ac121", hypothesis_guessed := "zyklus" ]
xsection[ short == "31b9cdb8c2", hypothesis_guessed := "" ]
xsection[ short == "f57c949bc6", hypothesis_guessed := "" ]
xsection[ short == "fec84c80ed", hypothesis_guessed := "" ]
xsection[ short == "932ed913af", hypothesis_guessed := "" ]
xsection[ short == "ae95c11557", hypothesis_guessed := "" ]
xsection[ short == "eb060e5336", hypothesis_guessed := "" ]
xsection[ short == "c45e627e15", hypothesis_guessed := "" ]
xsection[ short == "453499e409", hypothesis_guessed := "" ]
xsection[ short == "90089be9f5", hypothesis_guessed := "" ]
xsection[ short == "ae58ec2dbb", hypothesis_guessed := "" ]
xsection[ short == "9c13dee6fd", hypothesis_guessed := "" ]
xsection[ short == "582e1adeb9", hypothesis_guessed := "" ]
xsection[ short == "9ee25c852d", hypothesis_guessed := "" ]
xsection[ short == "690a569b23", hypothesis_guessed := "" ]
xsection[ short == "0b8734f4a6", hypothesis_guessed := "" ]
xsection[ short == "6aa4c44925", hypothesis_guessed := "" ]
xsection[ short == "b526819f16", hypothesis_guessed := "" ]
xsection[ short == "28b79f4b6b", hypothesis_guessed := "" ]
xsection[ short == "98740e20b9", hypothesis_guessed := "" ]
xsection[ short == "80bc472a44", hypothesis_guessed := "" ]
xsection[ short == "eb28d205a3", hypothesis_guessed := "" ]
xsection[ short == "b544f1cfa6", hypothesis_guessed := "" ]
xsection[ short == "e2b21db156", hypothesis_guessed := "" ]
xsection[ short == "38921238de", hypothesis_guessed := "" ]
xsection[ short == "bc5773179f", hypothesis_guessed := "" ]
xsection[ short == "0a616bcb8b", hypothesis_guessed := "" ]
xsection[ short == "05cc6c110f", hypothesis_guessed := "" ]
xsection[ short == "ad89045d3e", hypothesis_guessed := "" ]
xsection[ short == "12424f035f", hypothesis_guessed := "" ]
xsection[ short == "cd3bae7e4d", hypothesis_guessed := "" ]
xsection[ short == "82aadd4d54", hypothesis_guessed := "" ]
xsection[ short == "74180956ce", hypothesis_guessed := "" ]
xsection[ short == "c44f074f27", hypothesis_guessed := "" ]
xsection[ short == "d69bd5e5c7", hypothesis_guessed := "" ]
xsection[ short == "84303fa3d9", hypothesis_guessed := "" ]
xsection[ short == "f5983d5a38", hypothesis_guessed := "" ]
xsection[ short == "6387229fec", hypothesis_guessed := "" ]
xsection[ short == "a8e87b8625", hypothesis_guessed := "" ]
xsection[ short == "d6682585cf", hypothesis_guessed := "" ]
xsection[ short == "9521a5fc04", hypothesis_guessed := "" ]
xsection[ short == "5d88e189c5", hypothesis_guessed := "" ]
xsection[ short == "c086dd95fe", hypothesis_guessed := "" ]
xsection[ short == "08937722e0", hypothesis_guessed := "" ]
xsection[ short == "b270e1cfcf", hypothesis_guessed := "" ]
xsection[ short == "901a6883d2", hypothesis_guessed := "" ]
xsection[ short == "f4cd3dfd2f", hypothesis_guessed := "" ]
xsection[ short == "8946460c25", hypothesis_guessed := "" ]
xsection[ short == "b62fbfb07e", hypothesis_guessed := "" ]
xsection[ short == "8e63797127", hypothesis_guessed := "" ]
xsection[ short == "52c0e1e50b", hypothesis_guessed := "" ]
xsection[ short == "7fd7916603", hypothesis_guessed := "" ]
xsection[ short == "452d2bdfa1", hypothesis_guessed := "" ]
xsection[ short == "75ccee8d17", hypothesis_guessed := "" ]
xsection[ short == "c5981e2351", hypothesis_guessed := "" ]
xsection[ short == "aefa543164", hypothesis_guessed := "" ]
xsection[ short == "5c53f68442", hypothesis_guessed := "" ]
xsection[ short == "87e886967a", hypothesis_guessed := "" ]
xsection[ short == "6c626a50be", hypothesis_guessed := "" ]
xsection[ short == "70337246f2", hypothesis_guessed := "" ]
xsection[ short == "bf2f915bb3", hypothesis_guessed := "" ]
xsection[ short == "3dc0a45a14", hypothesis_guessed := "" ]
xsection[ short == "2f65164f23", hypothesis_guessed := "" ]
xsection[ short == "7582000c79", hypothesis_guessed := "" ]
xsection[ short == "01f0227990", hypothesis_guessed := "" ]
xsection[ short == "0907998489", hypothesis_guessed := "" ]
xsection[ short == "46aacd0680", hypothesis_guessed := "" ]
xsection[ short == "21424e80c7", hypothesis_guessed := "" ]
xsection[ short == "b8905fe532", hypothesis_guessed := "" ]
xsection[ short == "327dbf66ce", hypothesis_guessed := "" ]
xsection[ short == "4b1d503bfb", hypothesis_guessed := "" ]
xsection[ short == "512bb9b013", hypothesis_guessed := "" ]
xsection[ short == "9ddeebb1b9", hypothesis_guessed := "" ]
xsection[ short == "c2ea9ba138", hypothesis_guessed := "" ]
xsection[ short == "3cee8848fb", hypothesis_guessed := "" ]
xsection[ short == "5b1a3ce2a5", hypothesis_guessed := "zyklus" ]
xsection[ short == "42726af187", hypothesis_guessed := "" ]
xsection[ short == "555c83d081", hypothesis_guessed := "" ]
xsection[ short == "42a51ece43", hypothesis_guessed := "" ]
xsection[ short == "df32b62d17", hypothesis_guessed := "" ]
xsection[ short == "85fd3a1813", hypothesis_guessed := "" ]
xsection[ short == "196c17fcda", hypothesis_guessed := "" ]
xsection[ short == "bc96d50254", hypothesis_guessed := "" ]
xsection[ short == "3f776540f0", hypothesis_guessed := "zyklus, pille" ]
xsection[ short == "daf776aeb5", hypothesis_guessed := "zyklus" ]
xsection[ short == "02539de896", hypothesis_guessed := "" ]
xsection[ short == "05033e888d", hypothesis_guessed := "" ]
xsection[ short == "0a2c53e29f", hypothesis_guessed := "" ]
xsection[ short == "2078f87efa", hypothesis_guessed := "" ]
xsection[ short == "32cab9e923", hypothesis_guessed := "" ]
xsection[ short == "5b92bf3779", hypothesis_guessed := "" ]
xsection[ short == "7158408e37", hypothesis_guessed := "" ]
xsection[ short == "8f4f6b6442", hypothesis_guessed := "" ]
xsection[ short == "98016254d4", hypothesis_guessed := "" ]
xsection[ short == "cd0351ef4a", hypothesis_guessed := "" ]
xsection[ short == "d7a118a86a", hypothesis_guessed := "" ]
xsection[ short == "eaca396225", hypothesis_guessed := "" ]
xsection[ short == "ff0fa19bd9", hypothesis_guessed := "" ]
xsection[ short == "2103c78acb", hypothesis_guessed := "" ]
xsection[ short == "d600c6e3b8", hypothesis_guessed := "" ]
xsection[ short == "6b0dbbb71b", hypothesis_guessed := "" ]
xsection[ short == "8f892266fb", hypothesis_guessed := "" ]
xsection[ short == "b2b79b104b", hypothesis_guessed := "" ]
xsection[ short == "b346650b0c", hypothesis_guessed := "" ]
xsection[ short == "f5542c8c2f", hypothesis_guessed := "" ]
xsection[ short == "fc9cbca3b3", hypothesis_guessed := "" ]
#Weitere Kodierungen von Laura (12.05.2015):
xsection[ short == "016b21f02d", hypothesis_guessed := "" ]
xsection[ short == "ffe2877aab", hypothesis_guessed := "" ]
xsection[ short == "fa8e1fd4ab", hypothesis_guessed := "" ]
xsection[ short == "f8fbb1fe55", hypothesis_guessed := "zyklus" ]
xsection[ short == "f89d76dc03", hypothesis_guessed := "" ]
xsection[ short == "f080731320", hypothesis_guessed := "" ]
xsection[ short == "f037869cdb", hypothesis_guessed := "" ]
xsection[ short == "edb19a9192", hypothesis_guessed := "" ]
xsection[ short == "ec3bcb7064", hypothesis_guessed := "" ]
xsection[ short == "ea6bd8a246", hypothesis_guessed := "zyklus" ]
xsection[ short == "e7ca4b128d", hypothesis_guessed := "" ]
xsection[ short == "e68978de78", hypothesis_guessed := "" ]
xsection[ short == "e5be3120fa", hypothesis_guessed := "zyklus" ]
xsection[ short == "e38438818f", hypothesis_guessed := "" ]
xsection[ short == "e0ae7547cd", hypothesis_guessed := "" ]
xsection[ short == "df3c5400cf", hypothesis_guessed := "" ]
xsection[ short == "d5b5462be5", hypothesis_guessed := "" ]
xsection[ short == "ca9987aa7e", hypothesis_guessed := "zyklus" ]
xsection[ short == "c8e5b1bb2a", hypothesis_guessed := "" ]
xsection[ short == "c7d8266967", hypothesis_guessed := "" ]
xsection[ short == "c7cc8e9179", hypothesis_guessed := "" ]
xsection[ short == "c7002a6061", hypothesis_guessed := "" ]
xsection[ short == "372209367e", hypothesis_guessed := "" ]
xsection[ short == "36ea7e6950", hypothesis_guessed := "" ]
xsection[ short == "339b91c8e9", hypothesis_guessed := "" ]
xsection[ short == "30b5e0684a", hypothesis_guessed := "" ]
xsection[ short == "29220ba555", hypothesis_guessed := "" ]
xsection[ short == "2629507257", hypothesis_guessed := "zyklus" ]
xsection[ short == "2433cc44b9", hypothesis_guessed := "" ]
xsection[ short == "23e07da87c", hypothesis_guessed := "" ]
xsection[ short == "1fd70a942b", hypothesis_guessed := "" ]
xsection[ short == "1e7809c3ff", hypothesis_guessed := "" ]
xsection[ short == "1be261786b", hypothesis_guessed := "" ]
xsection[ short == "16884a03a0", hypothesis_guessed := "" ]
xsection[ short == "1642233f9a", hypothesis_guessed := "" ]
xsection[ short == "0aa79cb247", hypothesis_guessed := "" ]
xsection[ short == "07958bd3a3", hypothesis_guessed := "" ]
xsection[ short == "016b21f02d", hypothesis_guessed := "" ]
xsection[ short == "c33d554719", hypothesis_guessed := "" ]
xsection[ short == "c43db6a7f3", hypothesis_guessed := "" ]
xsection[ short == "c4a69cfdd2", hypothesis_guessed := "" ]
xsection[ short == "c78e191ab8", hypothesis_guessed := "" ]
xsection[ short == "c8c1bab681", hypothesis_guessed := "zyklus" ]
xsection[ short == "ccec904e73", hypothesis_guessed := "" ]
xsection[ short == "cd2d96883b", hypothesis_guessed := "zyklus" ]
xsection[ short == "ce898dee12", hypothesis_guessed := "zyklus" ]
xsection[ short == "ce8e40de7c", hypothesis_guessed := "zyklus" ]
xsection[ short == "d278dadb38", hypothesis_guessed := "" ]
xsection[ short == "dd7e68b004", hypothesis_guessed := "hormones" ]
xsection[ short == "ee429eb56c", hypothesis_guessed := "hormones" ]
xsection[ short == "f64039d090", hypothesis_guessed := "hormones" ]
xsection[ short == "f7cba09218", hypothesis_guessed := "" ]
xsection[ short == "a5df9fb4a6", hypothesis_guessed := "" ]
xsection[ short == "a7d84388f4", hypothesis_guessed := "" ]
xsection[ short == "a80dd12d1c", hypothesis_guessed := "" ]
xsection[ short == "aa6e507b4a", hypothesis_guessed := "zyklus" ]
xsection[ short == "b03ce0a6c1", hypothesis_guessed := "" ]
xsection[ short == "b4235d3dec", hypothesis_guessed := "zyklus" ]
xsection[ short == "b438d1b8d3", hypothesis_guessed := "" ]
xsection[ short == "b96d63f82a", hypothesis_guessed := "" ]
xsection[ short == "b9c5ba5f45", hypothesis_guessed := "" ]
xsection[ short == "bb55d539b6", hypothesis_guessed := "" ]
xsection[ short == "bbd0ab2a7c", hypothesis_guessed := "" ]
xsection[ short == "bd0a14ae7c", hypothesis_guessed := "" ]
xsection[ short == "bfbd787872", hypothesis_guessed := "" ]
xsection[ short == "8c6bdbb1ad", hypothesis_guessed := "" ]
xsection[ short == "8dd1e1df18", hypothesis_guessed := "" ]
xsection[ short == "8ff7ba488f", hypothesis_guessed := "" ]
xsection[ short == "90f7b0d155", hypothesis_guessed := "zyklus" ]
xsection[ short == "933a0738fb", hypothesis_guessed := "" ]
xsection[ short == "94b1eeac56", hypothesis_guessed := "" ]
xsection[ short == "96beeed073", hypothesis_guessed := "zyklus" ]
xsection[ short == "9700064ee1", hypothesis_guessed := "" ]
xsection[ short == "974be45de9", hypothesis_guessed := "" ]
xsection[ short == "9b49e26d03", hypothesis_guessed := "" ]
xsection[ short == "a0e3e25747", hypothesis_guessed := "" ]
xsection[ short == "a1ec8fa016", hypothesis_guessed := "" ]
xsection[ short == "a3c41a797b", hypothesis_guessed := "" ]
xsection[ short == "af7a926157", hypothesis_guessed := "" ]
xsection[ short == "7d57498d7c", hypothesis_guessed := "" ]
xsection[ short == "7d9813e530", hypothesis_guessed := "" ]
xsection[ short == "7e6a0e3560", hypothesis_guessed := "" ]
xsection[ short == "7e6af239b5", hypothesis_guessed := "" ]
xsection[ short == "7f30718a87", hypothesis_guessed := "" ]
xsection[ short == "7f58be6305", hypothesis_guessed := "" ]
xsection[ short == "80e84bd511", hypothesis_guessed := "" ]
xsection[ short == "82c07d327c", hypothesis_guessed := "" ]
xsection[ short == "8346102b19", hypothesis_guessed := "" ]
xsection[ short == "85f6ed2be2", hypothesis_guessed := "zyklus" ]
xsection[ short == "8694034d6e", hypothesis_guessed := "" ]
xsection[ short == "8bb8bdea8e", hypothesis_guessed := "" ]
xsection[ short == "8bbd65530f", hypothesis_guessed := "" ]
xsection[ short == "8c118bfaaa", hypothesis_guessed := "" ]
xsection[ short == "62ad56f438", hypothesis_guessed := "" ]
xsection[ short == "62f9888d39", hypothesis_guessed := "" ]
xsection[ short == "698a496071", hypothesis_guessed := "" ]
xsection[ short == "6a2b85aade", hypothesis_guessed := "" ]
xsection[ short == "6b3264f6f7", hypothesis_guessed := "" ]
xsection[ short == "6c08680fc7", hypothesis_guessed := "" ]
xsection[ short == "6c9e8db5f5", hypothesis_guessed := "zyklus" ]
xsection[ short == "71cde9c24a", hypothesis_guessed := "" ]
xsection[ short == "7250a0eaef", hypothesis_guessed := "" ]
xsection[ short == "7347274f15", hypothesis_guessed := "zyklus" ]
xsection[ short == "761f49975d", hypothesis_guessed := "zyklus" ]
xsection[ short == "78308b0462", hypothesis_guessed := "" ]
xsection[ short == "79ffad4690", hypothesis_guessed := "" ]
xsection[ short == "7ba9d146ee", hypothesis_guessed := "" ]
xsection[ short == "495eb8a1a4", hypothesis_guessed := "" ]
xsection[ short == "4e49fb555e", hypothesis_guessed := "" ]
xsection[ short == "4ff27e7894", hypothesis_guessed := "" ]
xsection[ short == "52d2920018", hypothesis_guessed := "" ]
xsection[ short == "57e9a4653d", hypothesis_guessed := "" ]
xsection[ short == "585d957869", hypothesis_guessed := "" ]
xsection[ short == "59b7aacb65", hypothesis_guessed := "" ]
xsection[ short == "5ae3516355", hypothesis_guessed := "" ]
xsection[ short == "5b52495100", hypothesis_guessed := "hormones" ]
xsection[ short == "5b947ae89f", hypothesis_guessed := "" ]
xsection[ short == "5bf0258d2e", hypothesis_guessed := "" ]
xsection[ short == "5e834dcb1b", hypothesis_guessed := "zyklus" ]
xsection[ short == "5ee0ef1c0d", hypothesis_guessed := "" ]
xsection[ short == "62ab3b40ac", hypothesis_guessed := "" ]
xsection[ short == "038daf9412", hypothesis_guessed := "zyklus" ]
xsection[ short == "1100a0c1b8", hypothesis_guessed := "zyklus" ]
xsection[ short == "2a7563b0e2", hypothesis_guessed := "" ]
xsection[ short == "2f5c3a7f8d", hypothesis_guessed := "hormones" ]
xsection[ short == "37226a6cf2", hypothesis_guessed := "" ]
xsection[ short == "385d08eaa9", hypothesis_guessed := "zyklus" ]
xsection[ short == "3afca832d6", hypothesis_guessed := "zyklus" ]
xsection[ short == "3b989930b5", hypothesis_guessed := "" ]
xsection[ short == "3e5128a514", hypothesis_guessed := "" ]
xsection[ short == "4587224153", hypothesis_guessed := "" ]
xsection[ short == "46ebc8885a", hypothesis_guessed := "" ]
xsection[ short == "4899378885", hypothesis_guessed := "" ]
xsection[ short == "48ba97715e", hypothesis_guessed := "" ]
xsection[ short == "c0bb1efc7c", hypothesis_guessed := "" ]
xsection[ short == "2460707240", hypothesis_guessed := "" ]
xsection[ short == "41a214d646", hypothesis_guessed := "zyklus" ]
xsection[ short == "5b5892bd42", hypothesis_guessed := "" ]
xsection[ short == "7bf4ee841c", hypothesis_guessed := "" ]
xsection[ short == "8490b693cd", hypothesis_guessed := "zyklus" ]
xsection[ short == "266759ceac", hypothesis_guessed := "" ]
xsection[ short == "4230b1e4b9", hypothesis_guessed := "" ]
xsection[ short == "88f484ab02", hypothesis_guessed := "" ]
xsection[ short == "8b46361090", hypothesis_guessed := "zyklus" ]
xsection[ short == "8b6988119b", hypothesis_guessed := "" ]
xsection[ short == "e811afaaf5", hypothesis_guessed := "" ]
xsection[ short == "08100198e5", hypothesis_guessed := "" ]
xsection[ short == "2b77126cdf", hypothesis_guessed := "" ]
xsection[ short == "4b831ccd26", hypothesis_guessed := "" ]
xsection[ short == "679f9f427d", hypothesis_guessed := "" ]
xsection[ short == "86fb131347", hypothesis_guessed := "" ]
xsection[ short == "9c36978700", hypothesis_guessed := "zyklus" ]
xsection[ short == "ab247aac0e", hypothesis_guessed := "" ]
xsection[ short == "cbe6931db2", hypothesis_guessed := "" ]
xsection[ short == "df9f46844a", hypothesis_guessed := "" ]
xsection[ short == "e98b2d9feb", hypothesis_guessed := "" ]
xsection[ short == "edd8560a9c", hypothesis_guessed := "" ]
xsection[ short == "n1pbqggsCW", hypothesis_guessed := "" ]
xsection[ short == "1c8aaf32ab", hypothesis_guessed := "" ]
xsection[ short == "412e04ad03", hypothesis_guessed := "" ]
xsection[ short == "9f95eb7af7", hypothesis_guessed := "zyklus" ]
xsection[ short == "Q0-FN91tkB", hypothesis_guessed := "zyklus" ]
xsection[ short == "jlDTAYbPji", hypothesis_guessed := "" ]
xsection[ short == "F_ltXLLl8H", hypothesis_guessed := "" ]
xsection[ short == "085635cb57", hypothesis_guessed := "zyklus" ]
xsection[ short == "59858e60ea", hypothesis_guessed := "hormones" ]
xsection[ short == "1d127ea9f0", hypothesis_guessed := "zyklus" ]
xsection[ short == "ac24597894", hypothesis_guessed := "" ]
xsection[ short == "b5ada85e09", hypothesis_guessed := "" ]
xsection[ short == "99bbc3846c", hypothesis_guessed := "" ]
xsection[ short == "e2f7046075", hypothesis_guessed := "" ]
xsection[ short == "ff6f9d2943", hypothesis_guessed := "" ]
xsection[ short == "524434fcd0", hypothesis_guessed := "" ]
xsection[ short == "86e8d19764", hypothesis_guessed := "" ]
xsection[ short == "0ba321ca04", hypothesis_guessed := "" ]
xsection[ short == "51385268f0", hypothesis_guessed := "" ]
xsection[ short == "667e0b755a", hypothesis_guessed := "" ]
xsection[ short == "ae9c69937a", hypothesis_guessed := "zyklus" ]
xsection[ short == "b99aafa45a", hypothesis_guessed := "" ]
xsection[ short == "dz_hzRfdL_", hypothesis_guessed := "" ]
# stand: 21.07.2015.
# we were asked to delete the last six diary days for this person (faked data after a break up)
remove = tail(tagebuch[str_sub(session,1,10) == "73a5c0c50d", session_id], 6)
tagebuch = tagebuch[! session_id %in% remove, ]
cat(xsection[ ! is.na(meaning_study) & is.na(hypothesis_guessed), str_c( str_sub(session, 1, 10),"\n", meaning_study,"\n\n") ]) # zeigt alle die was geschrieben haben, aber nur die bei denen wir die Hypothese noch nicht kodiert haben
xsection[is.na(hypothesis_guessed),]$hypothesis_guessed = ""
tagebuch$dodgy_data = FALSE
tagebuch$time_of_response = hour(tagebuch$created) + minute(tagebuch$created)/60 + second(tagebuch$created) / 60/60
qplot(tagebuch$time_of_response)
table(tagebuch$time_of_response < 15 & tagebuch$time_of_response > 1)
##
## FALSE TRUE
## 30705 219
tagebuch$dodgy_data = if_else(tagebuch$time_of_response < 15 & tagebuch$time_of_response > 1, T, tagebuch$dodgy_data, F)
tagebuch$time_of_response[which(tagebuch$time_of_response < 15 & tagebuch$time_of_response > 1)] = 1 # access time abberrations
tagebuch$time_of_response[which(tagebuch$time_of_response < 15)] =
tagebuch$time_of_response[which(tagebuch$time_of_response < 15)] + 24
qplot(tagebuch$time_of_response, binwidth = 5/60)
tagebuch$time_for_response = as.numeric(tagebuch$ended - tagebuch$created, units = "mins")
table(tagebuch$time_for_response > 60*24)
##
## FALSE TRUE
## 30548 376
tagebuch$dodgy_data = if_else(tagebuch$time_for_response > 60*24, T, tagebuch$dodgy_data)
table(tagebuch$time_for_response < 1)
##
## FALSE TRUE
## 30894 30
tagebuch$dodgy_data = if_else(tagebuch$time_for_response < 1, T, tagebuch$dodgy_data)
table(tagebuch$time_for_response > 60*24)
##
## FALSE TRUE
## 30548 376
table(tagebuch$time_for_response > 60*7)
##
## FALSE TRUE
## 29736 1188
tagebuch$time_for_response[tagebuch$time_for_response > 60 * 7] = NA
qplot(tagebuch$time_for_response) + scale_x_log10()
qplot(tagebuch$time_for_response) + xlim(0,10)
table(tagebuch$time_for_response < 1)
##
## FALSE TRUE
## 29706 30
sd(tagebuch$time_for_response[tagebuch$time_for_response<20], na.rm = T)
## [1] 2.695
mean(tagebuch$time_for_response[tagebuch$time_for_response<20], na.rm = T)
## [1] 4.306
tagebuch$time_for_response[tagebuch$time_for_response>100] = NA
tagebuch = tagebuch %>% group_by(session) %>% arrange(session,created, ended) %>%
mutate(time_since_last_response = as.numeric(created - lag(ended) , units = "hours"))
qplot(tagebuch$time_since_last_response) + scale_x_log10()
table(round(tagebuch$time_since_last_response)) %>% head(24)
##
## -121 -18 -15 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 1 1 1 73 8 8 28 23 36 40 44 65 68 52 34 39 39 25 34 28 117 419
## 19 20
## 830 1047
table(tagebuch$time_since_last_response < 17)
##
## FALSE TRUE
## 29071 677
tagebuch = tagebuch %>%
mutate(
dodgy_data = if_else(time_since_last_response < 17, T, dodgy_data, dodgy_data),
# timing mishaps?
time_since_last_response = if_else(time_since_last_response < 17, NA_real_,
# skipped days
if_else(time_since_last_response > 31, NA_real_, time_since_last_response)))
qplot(tagebuch$time_since_last_response)
tagebuch = tagebuch %>% ungroup() %>% tbl_df()
Check for days on which probands entered nonsensical data.
likert_scales = tagebuch %>% select(session, created, choice_of_clothing_1, choice_of_clothing_2, choice_of_clothing_3, choice_of_clothing_4, choice_of_clothing_5, choice_of_clothing_6, choice_of_clothing_7, choice_of_clothing_8, self_esteem_1, desirability_1, desirability_partner, NARQ_admiration_1, NARQ_admiration_2, NARQ_admiration_3, NARQ_rivalry_1, NARQ_rivalry_2, NARQ_rivalry_3, jealousy_1, male_jealousy_1, male_jealousy_2, male_jealousy_3, male_mate_retention_1, male_mate_retention_2, mate_retention_3, mate_retention_4, mate_retention_5, mate_retention_6, attention_1, attention_2, male_attention_1, extra_pair_2, extra_pair_3, extra_pair_4, extra_pair_5, extra_pair_6, extra_pair_7, extra_pair_8, extra_pair_9, extra_pair_10, extra_pair_11, extra_pair_12, extra_pair_13)
likert_scales = likert_scales %>% gather(variable, value, -session, -created) %>% group_by(session,created)
# all 6-point scales
crosstabs(~ variable + value, data = likert_scales)
## value
## variable 1 2 3 4 5 6 <NA>
## attention_1 2950 1413 3069 7681 7269 7492 1050
## attention_2 7785 3194 4982 5502 4166 4245 1050
## choice_of_clothing_1 6472 3381 5966 7768 4545 1764 1028
## choice_of_clothing_2 628 1046 3185 9221 8696 7116 1032
## choice_of_clothing_3 2782 2963 5880 8896 6298 3068 1037
## choice_of_clothing_4 5639 4092 7002 8183 3918 1050 1040
## choice_of_clothing_5 14708 5023 5030 3347 1400 377 1039
## choice_of_clothing_6 3567 2175 4804 10377 6510 2450 1041
## choice_of_clothing_7 10425 4983 7017 4902 2004 550 1043
## choice_of_clothing_8 8689 4507 7087 5938 2720 941 1042
## desirability_1 3063 2944 6093 9126 5787 2868 1043
## desirability_partner 3019 2413 5435 9046 6032 3934 1045
## extra_pair_10 24168 1382 1145 1629 781 768 1051
## extra_pair_11 26789 1080 728 773 299 204 1051
## extra_pair_12 16870 2130 2522 4655 2281 1415 1051
## extra_pair_13 24682 1332 930 1396 663 870 1051
## extra_pair_2 12039 3570 4078 5888 2883 1416 1050
## extra_pair_3 19277 2631 2303 2754 1518 1390 1051
## extra_pair_4 26262 1237 763 825 355 431 1051
## extra_pair_5 21919 991 877 1804 1429 2853 1051
## extra_pair_6 18929 1310 1403 2853 2036 3342 1051
## extra_pair_7 22819 2032 1579 1865 875 703 1051
## extra_pair_8 26492 1258 880 766 277 200 1051
## extra_pair_9 25338 1245 995 1262 522 511 1051
## jealousy_1 16788 1826 2111 3706 2333 3113 1047
## male_attention_1 7147 2601 3986 5240 4890 6010 1050
## male_jealousy_1 24376 2162 1432 1209 429 268 1048
## male_jealousy_2 24805 1817 1312 1176 433 334 1047
## male_jealousy_3 27378 1128 600 403 191 176 1048
## male_mate_retention_1 17532 1787 2043 3564 2062 2888 1048
## male_mate_retention_2 7590 1225 2136 3458 3508 11959 1048
## mate_retention_3 10603 2841 4843 5519 3674 2396 1048
## mate_retention_4 7577 1239 2205 3522 3550 11783 1048
## mate_retention_5 22085 2475 1817 2071 824 604 1048
## mate_retention_6 9646 3944 5689 6026 2967 1603 1049
## NARQ_admiration_1 10545 4120 6077 6020 2289 826 1047
## NARQ_admiration_2 10803 3991 5827 6000 2397 859 1047
## NARQ_admiration_3 11050 3939 5601 5852 2510 925 1047
## NARQ_rivalry_1 24605 2457 1357 960 276 221 1048
## NARQ_rivalry_2 25617 1753 960 894 321 332 1047
## NARQ_rivalry_3 26119 1829 1012 611 202 103 1048
## self_esteem_1 1115 1290 3612 9965 9971 3928 1043
no_variation = likert_scales %>% summarise(no_variation = sd(value,na.rm = T) == 0)
table(no_variation$no_variation)
##
## FALSE TRUE
## 29868 23
tagebuch = tagebuch %>% left_join(no_variation, by = c("session", "created")) %>%
mutate(dodgy_data = if_else(no_variation, TRUE, dodgy_data, dodgy_data))
table(tagebuch$dodgy_data, exclude = NULL)
##
## FALSE TRUE
## 29660 1264
tagebuch = tagebuch %>% tbl_dt()
On April 25, 2015, we changed an item prior to the evening diary. The item relationship_satisfaction_1 had a mix-up in the response options 4 and 5, so that “mostly satisfied” came before “rather satisfied”. Hence, we cannot interpret 4 and 5 very well prior to 4/25 because we can’t know whether subjects respected the order or the phrasing. Hence, we decided to assign both 4s and 5s a 4.5 prior to April 25.
We did the same thing for the item sexual_intercourse_4 on May 29.
library(car)
table(tagebuch$relationship_satisfaction_1)
##
## 1 2 3 4 5 6
## 707 880 2189 5569 9915 10642
tagebuch[, relationship_satisfaction_1 := as.numeric(relationship_satisfaction_1)]
tagebuch[created < as.POSIXct('2014-04-25') , relationship_satisfaction_1 := Recode(relationship_satisfaction_1, "4=4.5;5=4.5")]
table(tagebuch$relationship_satisfaction_1)
##
## 1 2 3 4 4.5 5 6
## 707 880 2189 5036 1440 9008 10642
table(tagebuch$sexual_intercourse_4)
##
## 1 2 3 4 5 6
## 46 78 250 1060 2188 2889
tagebuch[, sexual_intercourse_4 := as.numeric(sexual_intercourse_4)]
tagebuch[created < as.POSIXct('2014-04-29') , sexual_intercourse_4 := Recode(sexual_intercourse_4, "4=4.5;5=4.5")]
table(tagebuch$sexual_intercourse_4)
##
## 1 2 3 4 4.5 5 6
## 46 78 250 916 445 1887 2889
xsection$SOI_be = rowMeans(xsection[, list(SOI_R_1, SOI_R_2, SOI_R_3)])
xsection$SOI_im = rowMeans(xsection[, list(SOI_R_7, SOI_R_8, SOI_R_9)])
xsection$SOI_uv = rowMeans(xsection[, list(SOI_R_4, SOI_R_5, SOI_R_6)])
# this woman told us she reported masturbation as "intimacy with someone other than my partner"
tagebuch[session %begins_with% "ce8e40de7c", ]$extra_pair_1 = NA
tagebuch[session %begins_with% "ce8e40de7c", ]$extra_pair_1b = NA
xsection$duration_relationship_total <- xsection$duration_relationship_months + xsection$duration_relationship_years*12 # create new variable duration_relationship_total
xsection[, hormonal_contraception := ifelse( contraception %contains% "contraceptive_pill" | contraception %contains% "other_hormonal", 1, 0) ]
qplot(factor(xsection$hormonal_contraception))
xsection[, pill_contraception := ifelse( contraception %contains% "contraceptive_pill", 1, 0) ]
qplot(factor(xsection$pill_contraception))
# library(xlsx)
# pillen2 = read.xlsx( "item_tables/Vorab_Fragebogen1-v3.xlsx", sheetName = "Choices")
pillen = readxl::read_excel( "item_tables/Vorab_Fragebogen1-v3.xlsx",sheet = "Choices") %>% data.frame()
pillen = as.data.table(pillen[ 1:97, ])
pillen[, gestagen_ug_merged := as.numeric(as.character( str_replace_all(
str_replace_all( str_replace_all( Gestagen.Milligramm.pro.Pille, '500', ',500'), '000', '')
,pattern = ',','.') )) * 1000]
pillen[, gestagen_cycle := as.numeric(as.character( str_replace_all( Gestagen.Milligramm.pro.Zyklus , pattern = ',','.')))]
pillen[name == "yasmin", ]
crosstabs(pillen$gestagen_ug_merged)
## pillen$gestagen_ug_merged
## 30 60 75 100 125 150 250 2000 2500 3000 <NA>
## 2 1 7 10 3 14 1 31 1 8 19
pillen[, estrogen_ug_cycle := as.numeric(as.character( Östrogenmikrogramm.pro.Zyklus )) ]
pillen[, gestagen_type := as.character(Art.des.Gestagens) ]
crosstabs(pillen$gestagen_type)
## pillen$gestagen_type
## CMA CPA DNG DSG DSP GSD LNG NES NGT NOMAC <NA>
## 16 10 8 18 8 3 26 4 2 1 1
pillen[, pills := as.character(name)]
pillen[is.na(estrogen_ug_cycle), list(pills, Östrogenmikrogramm.pro.Zyklus) ]
pillen[is.na(gestagen_ug_merged), list(pills, Gestagen.Milligramm.pro.Pille) ]
pillen = pillen[ pills != 'other', ]
pillen[is.na(Östrogenmikrogramm.pro.Zyklus), estrogen_ug_cycle := 0 ]
pillen[is.na(Östrogenmikrogramm.pro.Pille), estrogen_ug_merged := 0 ]
qplot(pillen$estrogen_ug_cycle)
table(xsection$pills == "qlaira")
##
## FALSE TRUE
## 606 4
table(xsection$pills == "zoely")
##
## FALSE TRUE
## 602 8
xsection = merge(xsection, pillen[,list(pills, estrogen_ug_merged, estrogen_ug_cycle
, gestagen_type,gestagen_ug_merged, gestagen_cycle
)], by = "pills", all.x = T)
table(xsection$pills == "other")
##
## FALSE TRUE
## 556 54
crosstabs(pillen[, estrogen_ug_cycle/estrogen_ug_merged])
## pillen[, estrogen_ug_cycle/estrogen_ug_merged]
## NaN <NA>
## 9 87
## Estrogen Levels for all people (also for those who didnt take the pill)
crosstabs(xsection$estrogen_ug_cycle)
## xsection$estrogen_ug_cycle
## 0 360 420 480 630 665 680 730 735 1150 <NA>
## 26 1 141 11 328 3 4 2 26 2 664
xsection[, estrogen_ug_all := ifelse(hormonal_contraception, estrogen_ug_cycle, 0)]
crosstabs(xsection$estrogen_ug_all)
## xsection$estrogen_ug_all
## 0 360 420 480 630 665 680 730 735 1150 <NA>
## 540 1 141 11 328 3 4 2 26 2 150
# Forming Categories
xsection[, estrogen_categories := cut(estrogen_ug_all, breaks = c(0, 300, 600, 1200), include.lowest = T)]
crosstabs(xsection$estrogen_categories)
## xsection$estrogen_categories
## [0,300] (300,600] (600,1.2e+03]
## 540 153 365
cat(xsection[ pills=='other' & is.na(estrogen_ug_cycle), str_c( str_sub(session, 1, 10),"\n", other_pill_name,"\n\n") ])
## 033d3e1d21
## yris mite
##
## 0643a31c42
## swingo
##
## 0d9a341ceb
## desofemine 20 Nova
##
## 1528e9536b
## dienovel
##
## 1c93be41a1
## Carlin
##
## 1f9fc2cc79
## Kleodina
##
## 21054c807c
## Maitalon
##
## 2433cc44b9
## ludéal glé
##
## 24af1309b4
## Estinette
##
## 29220ba555
## Swingo
##
## 319e2ce5cb
## Midane
##
## 354137dd7f
## Sibilla
##
## 37226a6cf2
## Asumate 30
##
## 3d0adfe3a2
## Aristelle
##
## 4167741e29
## yara hexal
##
## 4202863d09
## Cedia
##
## 47b6e59dfa
## Layaisa
##
## 4aafa757ef
## Eliza
##
## 511189d8a0
## Selina
##
## 59858e60ea
## Solera
##
## 5b4fad6f3f
## visanne
##
## 62f9888d39
## Swingo 30
##
## 640a7b047b
## aristelle
##
## 646c651ea4
## Leanova AL
##
## 6da65c4e45
## Dienovel
##
## 82aadd4d54
## Dienovel
##
## 83c84a5860
## Selina
##
## 86fb131347
## Aristelle
##
## 8bd7c4624b
## Dienovel
##
## 8c118bfaaa
## Swingo
##
## 8f892266fb
## Yvette
##
## 9336f57640
## Midane
##
## 9700064ee1
## Dienovel
##
## 9b49e26d03
## Minette
##
## 9c89af2b7f
## Microgynon
##
## 9f61438d5f
## Violette
##
## aa38292783
## minesse
##
## aebdd368b4
## Swingo 30
##
## b726d9cd36
## Swingo
##
## c09518da85
## Lysandra beta
##
## c33d554719
## Dienovel
##
## c6dd42db4b
## desofemono
##
## c8ca0944d9
## Velafee
##
## cf37f837bc
## Dienogenance
##
## d30783c574
## Mirelle
##
## d5b5462be5
## Solera
##
## e0eeb02960
## LISA - Studienpille
##
## e2f049ef95
## mercilon
##
## e68978de78
## Desogestrel aristo
##
## e7e992c12b
## Desogestrel atisto
##
## edb19a9192
## Solgest
##
## f42ae3caec
## sibilla
##
## f64039d090
## Swingo 30
##
## f9454c5d18
## Dienovel
xsection[ short == "0643a31c42", estrogen_ug := 20 ]
xsection[ short == "0643a31c42", gestagens_ug := 100]
xsection[ short == "0643a31c42", gestagens := "LNG"]
xsection[ short == "1528e9536b", estrogen_ug := 30 ]
xsection[ short == "1528e9536b", gestagens_ug := 2000]
xsection[ short == "1528e9536b", gestagens := "DNG"]
xsection[ short == "21054c807c", estrogen_ug := 20 ]
xsection[ short == "21054c807c", gestagens_ug := 3000]
xsection[ short == "21054c807c", gestagens := "DSP"]
#one person swapped their values
xsection[ short == "24af1309b4", estrogen_ug := 20 ]
xsection[ short == "24af1309b4", gestagens_ug := 75]
xsection[ short == "24af1309b4", gestagens := "GSD"]
xsection[ short == "4167741e29", estrogen_ug := 20 ]
xsection[ short == "4167741e29", gestagens_ug := 3000]
xsection[ short == "4167741e29", gestagens := "DSP"]
# visanne is used as a contraceptive only in combination with something containing estrogen, but we don't know what was the case here
xsection[ short == "5b4fad6f3f", estrogen_ug := NA]
xsection[ short == "5b4fad6f3f", gestagens_ug := NA]
xsection[ short == "5b4fad6f3f", gestagens := NA_character_]
xsection[ short == "71f06ef90f", estrogen_ug := 30 ]
xsection[ short == "71f06ef90f", gestagens_ug := 300]
xsection[ short == "71f06ef90f", gestagens := "DNG"]
xsection[ short == "82aadd4d54", estrogen_ug := 30 ]
xsection[ short == "82aadd4d54", gestagens_ug := 2000]
xsection[ short == "82aadd4d54", gestagens := "DNG"]
xsection[ short == "83c84a5860", estrogen_ug := 30 ]
xsection[ short == "83c84a5860", gestagens_ug := 150]
xsection[ short == "83c84a5860", gestagens := "LNG"]
xsection[ short == "8bd7c4624b", estrogen_ug := 30 ]
xsection[ short == "8bd7c4624b", gestagens_ug := 2000]
xsection[ short == "8bd7c4624b", gestagens := "DNG"]
xsection[ short == "9b49e26d03", estrogen_ug := 30 ]
xsection[ short == "9b49e26d03", gestagens_ug := 2000]
xsection[ short == "9b49e26d03", gestagens := "CMA"]
xsection[ short == "9f61438d5f", estrogen_ug := 30 ]
xsection[ short == "9f61438d5f", gestagens_ug := 2000]
xsection[ short == "9f61438d5f", gestagens := "DNG"]
xsection[ short == "b726d9cd36", estrogen_ug := 20 ]
xsection[ short == "b726d9cd36", gestagens_ug := 100]
xsection[ short == "b726d9cd36", gestagens := "LNG"]
xsection[ short == "c09518da85", estrogen_ug := 35 ]
xsection[ short == "c09518da85", gestagen_ug := 250]
xsection[ short == "c09518da85", gestagens := "NGT"]
xsection[ short == "c6dd42db4b", estrogen_ug := 0 ]
xsection[ short == "c6dd42db4b", gestagens_ug := 75]
xsection[ short == "c6dd42db4b", gestagens := "DSG"]
xsection[ short == "e2f049ef95", estrogen_ug := 20 ]
xsection[ short == "e2f049ef95", gestagens_ug := 60]
xsection[ short == "e2f049ef95", gestagens := "DSG"]
xsection[ short == "59858e60ea", estrogen_ug := 20 ]
xsection[ short == "59858e60ea", gestagens_ug := 60]
xsection[ short == "59858e60ea", gestagens := "DSG"]
xsection[ pills == "other", list(other_pill_name, estrogen_ug,estrogen_ug_cycle, gestagens, gestagens_ug)]
## could also use info on gestagens etc
xsection[ pills == "other", estrogen_ug_merged := estrogen_ug] # per pill
xsection[ pills == "other", estrogen_ug_cycle := 21 * estrogen_ug] # times 21
xsection[ pills == "other", gestagen_type := gestagens]
xsection[ pills == "other", gestagen_ug_merged := gestagens_ug]
crosstabs(~is.na(xsection$pills) + is.na(xsection$estrogen_ug_cycle)) # except qlaira and zoely
## is.na(xsection$estrogen_ug_cycle)
## is.na(xsection$pills) FALSE TRUE
## FALSE 597 13
## TRUE 0 598
qplot(xsection$estrogen_ug_cycle)
# qplot(xsection$gestagen_ug_merged)
# xsection[ gestagen_ug_merged < 100, ]
# xsection[gestagen_ug_merged <= estrogen_ug_merged, list(pills, other_pill_name, gestagen_ug_merged, estrogen_ug_merged) ]
qplot(data=xsection, estrogen_ug_cycle, estrogen_ug_merged, geom = 'jitter')
# library(xlsx)
other_hormonal = readxl::read_excel( "item_tables/Nachbefragung_other_hormonal.xlsx", sheet = "Choices") %>% data.frame()
other_hormonal = as.data.table(other_hormonal)
other_hormonal[, hormonal_2 := as.character( name)]
xsection = merge(xsection, other_hormonal[,list(hormonal_2, estrogen_ug_other)], by = "hormonal_2", all.x = T)
nrow(xsection[!is.na(xsection$estrogen_ug_other),])
## [1] 39
table(xsection$estrogen_ug_other)
##
## 0 315 420
## 12 26 1
xsection[!is.na(estrogen_ug_other), estrogen_ug_cycle := estrogen_ug_other]
qplot(data = xsection, estrogen_ug_merged, estrogen_ug_cycle)
qplot(data = xsection, estrogen_ug_cycle, weight) + geom_smooth() # no association
qplot(data = xsection, estrogen_ug_cycle / weight)
table(xsection$weight)
##
## 20 21 22 28 29 30 32 35 37 41 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
## 4 3 1 1 2 1 2 1 2 2 4 3 6 6 13 11 19 38 21 38 42 37 73 36 42 73 45
## 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
## 82 33 45 45 36 61 39 25 34 21 45 12 15 10 10 29 7 7 9 6 25 3 11 5 5 6 2
## 87 88 89 90 91 92 93 95 97 99 100 101 105 107 110 111 112 113 114 115 120 130 134 143 166
## 1 4 2 5 1 1 1 5 2 3 5 1 3 2 1 2 1 1 1 2 6 1 1 1 1
xsection[, estrogen_ug_per_kg := estrogen_ug_cycle / weight]
xsection[ estrogen_ug_per_kg > 20, list(estrogen_ug_cycle, weight, height)]
crosstabs(xsection$estrogen_ug_cycle)
## xsection$estrogen_ug_cycle
## 0 315 360 420 480 630 665 680 730 735 1150 <NA>
## 44 28 1 156 11 356 3 4 2 29 2 572
# create NARQ_K_total scale
tagebuch$NARQ_K_total = rowMeans(tagebuch[, list(NARQ_admiration_1, NARQ_admiration_2, NARQ_admiration_3, NARQ_rivalry_1, NARQ_rivalry_2, NARQ_rivalry_3 ) ])
###create extra-pair subscales
#extra-pair desire/fantasies; extra_pair_7, extra_pair_10, extra_pair_11, extra_pair_12, extra_pair_13
tagebuch$extra_pair_desire = rowMeans(tagebuch[, list(extra_pair_7, extra_pair_10, extra_pair_11, extra_pair_12, extra_pair_13)])
tagebuch$extra_pair_sexual_fantasies = tagebuch$extra_pair_13
#extra-pair flirting; extra_pair_4, extra_pair_8, extra_pair_9
tagebuch$extra_pair_flirting = rowMeans(tagebuch[, list(extra_pair_4, extra_pair_8, extra_pair_9)])
#extra-pair compliments; extra_pair_2, extra_pair_3
tagebuch$extra_pair_compliments = rowMeans(tagebuch[, list(extra_pair_2, extra_pair_3)])
#extra-pair going out; extra_pair_5, extra_pair_6
tagebuch$extra_pair_going_out = rowMeans(tagebuch[, list(extra_pair_5, extra_pair_6)])
tagebuch$extra_pair = rowMeans( subset(tagebuch, select = extra_pair_2:extra_pair_13) )
tagebuch %>%
filter(!is.na(created)) %>%
arrange(session, created) %>%
rename(sexual_intercourse_satisfaction = sexual_intercourse_4) %>%
mutate(
short_session = str_sub(session, 1, 6),
created_date = as.Date(created - dhours(9)),
partner_initiated_sexual_intercourse = ifelse(sexual_intercourse_3 == 2, 1, 0),
had_sexual_intercourse = ifelse(sexual_intercourse_2 == 3, 0, 1),
had_petting = ifelse(sexual_intercourse_5 == 3, 0, 1),
extra_pair_intimacy = ifelse(extra_pair_1 == 1, 1, 0),
extra_pair_sex = ifelse(is.na(extra_pair_1b) | extra_pair_1b != 1, 0, 1)
) -> tagebuch
tagebuch %>% select(matches("choice_of_clothing_(4|6|7)")) %>% alpha()
##
## Reliability analysis
## Call: psych::alpha(x = data.frame(x))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.84 0.84 0.79 0.64 5.3 0.0016 3.1 1.2 0.62
##
## lower alpha upper 95% confidence boundaries
## 0.84 0.84 0.85
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## choice_of_clothing_4 0.72 0.72 0.56 0.56 2.5 0.0032 NA 0.56
## choice_of_clothing_6 0.85 0.85 0.74 0.74 5.7 0.0017 NA 0.74
## choice_of_clothing_7 0.77 0.77 0.62 0.62 3.3 0.0026 NA 0.62
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## choice_of_clothing_4 29884 0.90 0.90 0.85 0.77 3.1 1.4
## choice_of_clothing_6 29883 0.84 0.83 0.68 0.63 3.7 1.4
## choice_of_clothing_7 29881 0.88 0.88 0.80 0.72 2.5 1.4
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 miss
## choice_of_clothing_4 0.19 0.14 0.23 0.27 0.13 0.04 0.03
## choice_of_clothing_6 0.12 0.07 0.16 0.35 0.22 0.08 0.03
## choice_of_clothing_7 0.35 0.17 0.23 0.16 0.07 0.02 0.03
tagebuch$sexy_clothes = rowMeans(tagebuch %>% select(matches("choice_of_clothing_(4|6|7)")) , na.rm = T)
tagebuch$showy_clothes = tagebuch$choice_of_clothing_8
tagebuch$sexual_intercourse_1_6scale = tagebuch$sexual_intercourse_1/5*6
tagebuch %>% select(sexual_intercourse_1_6scale, desirability_partner, attention_2) %>% alpha()
##
## Reliability analysis
## Call: psych::alpha(x = data.frame(x))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.86 0.87 0.82 0.69 6.6 0.0013 3.6 1.4 0.69
##
## lower alpha upper 95% confidence boundaries
## 0.86 0.86 0.87
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## sexual_intercourse_1_6scale 0.81 0.82 0.69 0.69 4.5 0.0021 NA 0.69
## desirability_partner 0.79 0.80 0.66 0.66 3.9 0.0023 NA 0.66
## attention_2 0.83 0.83 0.71 0.71 4.8 0.0020 NA 0.71
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## sexual_intercourse_1_6scale 29902 0.88 0.89 0.80 0.74 3.7 1.5
## desirability_partner 29879 0.89 0.90 0.82 0.77 3.8 1.5
## attention_2 29874 0.90 0.88 0.79 0.73 3.3 1.8
tagebuch$in_pair_desire = rowMeans(tagebuch %>% select(sexual_intercourse_1_6scale, desirability_partner, attention_2))
tagebuch %>% select(male_jealousy_2, male_mate_retention_1, male_mate_retention_2, male_attention_1) %>% alpha()
##
## Reliability analysis
## Call: psych::alpha(x = data.frame(x))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.49 0.48 0.44 0.18 0.91 0.0044 2.8 1.1 0.16
##
## lower alpha upper 95% confidence boundaries
## 0.48 0.49 0.5
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## male_jealousy_2 0.51 0.50 0.43 0.25 1.00 0.0048 0.0257 0.19
## male_mate_retention_1 0.47 0.43 0.38 0.20 0.74 0.0044 0.0428 0.12
## male_mate_retention_2 0.31 0.35 0.26 0.15 0.53 0.0064 0.0019 0.13
## male_attention_1 0.32 0.33 0.26 0.14 0.50 0.0062 0.0074 0.19
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## male_jealousy_2 29877 0.38 0.55 0.25 0.16 1.4 0.98
## male_mate_retention_1 29876 0.60 0.61 0.36 0.23 2.3 1.79
## male_mate_retention_2 29876 0.75 0.67 0.52 0.38 4.0 2.06
## male_attention_1 29874 0.72 0.67 0.53 0.38 3.5 1.84
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 miss
## male_jealousy_2 0.83 0.06 0.04 0.04 0.01 0.01 0.03
## male_mate_retention_1 0.59 0.06 0.07 0.12 0.07 0.10 0.03
## male_mate_retention_2 0.25 0.04 0.07 0.12 0.12 0.40 0.03
## male_attention_1 0.24 0.09 0.13 0.18 0.16 0.20 0.03
tagebuch$partner_mate_retention = rowMeans(tagebuch %>% select(male_jealousy_2, male_mate_retention_1, male_mate_retention_2, male_attention_1))
tagebuch %>% select(mate_retention_3, mate_retention_4, mate_retention_5, mate_retention_6, attention_1) %>% alpha()
##
## Reliability analysis
## Call: psych::alpha(x = data.frame(x))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.71 0.71 0.69 0.33 2.4 0.0024 3.1 1.1 0.37
##
## lower alpha upper 95% confidence boundaries
## 0.71 0.71 0.72
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## mate_retention_3 0.66 0.65 0.62 0.31 1.8 0.0030 0.0317 0.28
## mate_retention_4 0.65 0.64 0.59 0.30 1.7 0.0031 0.0208 0.29
## mate_retention_5 0.75 0.75 0.70 0.43 3.0 0.0023 0.0057 0.41
## mate_retention_6 0.65 0.64 0.61 0.31 1.8 0.0030 0.0311 0.28
## attention_1 0.61 0.60 0.55 0.28 1.5 0.0034 0.0118 0.28
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## mate_retention_3 29876 0.71 0.70 0.58 0.50 2.9 1.7
## mate_retention_4 29876 0.77 0.72 0.64 0.53 4.0 2.1
## mate_retention_5 29876 0.42 0.49 0.26 0.22 1.6 1.2
## mate_retention_6 29875 0.70 0.71 0.61 0.51 2.8 1.6
## attention_1 29874 0.78 0.77 0.73 0.62 4.3 1.5
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 miss
## mate_retention_3 0.35 0.10 0.16 0.18 0.12 0.08 0.03
## mate_retention_4 0.25 0.04 0.07 0.12 0.12 0.39 0.03
## mate_retention_5 0.74 0.08 0.06 0.07 0.03 0.02 0.03
## mate_retention_6 0.32 0.13 0.19 0.20 0.10 0.05 0.03
## attention_1 0.10 0.05 0.10 0.26 0.24 0.25 0.03
tagebuch$female_mate_retention = rowMeans(tagebuch %>% select(mate_retention_3, mate_retention_4, mate_retention_5, mate_retention_6, attention_1), na.rm = T)
cor.test(tagebuch$female_mate_retention, tagebuch$in_pair_desire)
##
## Pearson's product-moment correlation
##
## data: tagebuch$female_mate_retention and tagebuch$in_pair_desire
## t = 140, df = 30000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6226 0.6363
## sample estimates:
## cor
## 0.6295
tagebuch %>% select(jealousy_1, male_jealousy_1, male_jealousy_3) %>% alpha()
##
## Reliability analysis
## Call: psych::alpha(x = data.frame(x))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.35 0.46 0.37 0.22 0.85 0.0055 1.7 0.83 0.18
##
## lower alpha upper 95% confidence boundaries
## 0.34 0.35 0.36
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## jealousy_1 0.48 0.50 0.33 0.33 1.00 0.0055 NA 0.33
## male_jealousy_1 0.18 0.26 0.15 0.15 0.35 0.0061 NA 0.15
## male_jealousy_3 0.26 0.30 0.18 0.18 0.44 0.0069 NA 0.18
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## jealousy_1 29877 0.85 0.64 0.29 0.20 2.4 1.83
## male_jealousy_1 29876 0.61 0.73 0.50 0.27 1.4 0.96
## male_jealousy_3 29876 0.51 0.71 0.48 0.27 1.2 0.68
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 miss
## jealousy_1 0.56 0.06 0.07 0.12 0.08 0.10 0.03
## male_jealousy_1 0.82 0.07 0.05 0.04 0.01 0.01 0.03
## male_jealousy_3 0.92 0.04 0.02 0.01 0.01 0.01 0.03
tagebuch$female_jealousy = rowMeans(tagebuch %>% select(jealousy_1, male_jealousy_1, male_jealousy_3), na.rm = T)
tagebuch %>% select(in_pair_desire, female_jealousy, female_mate_retention, partner_mate_retention) %>% cor(use='na.or.complete') %>% round(2)
## in_pair_desire female_jealousy female_mate_retention partner_mate_retention
## in_pair_desire 1.00 0.15 0.63 0.57
## female_jealousy 0.15 1.00 0.22 0.46
## female_mate_retention 0.63 0.22 1.00 0.70
## partner_mate_retention 0.57 0.46 0.70 1.00
xsection$has_not_had_sex_yet = ifelse(xsection$first_time == 999, 1, 0)
xsection$first_time = ifelse(xsection$first_time == 999 | xsection$first_time < 8, NA_real_, xsection$first_time)
xsection[menarche == 1, menarche := NA_real_] # unlikely
xsection[age < first_time, has_not_had_sex_yet := 1] # one person, presumably entered this info before we told people how to indicate that they have not had sex
xsection[age < first_time, first_time := NA_real_]
xsection$trying_to_get_pregnant = ifelse(xsection$wish_for_child == 2, "no", "yes")
xsection$had_sex_with_partner_yet = ifelse(xsection$sexual_intercourse_partner == 2, "no", "yes")
###Tätigkeit_Cleaning
# Die Gruppen "Auszubildende" hat ein Leerzeichen in der Gruppenbezeichnung
xsection[occupation == "Auszubildende ", occupation := "Auszubildende" ]
xsection[ , occupation_clean := occupation]
xsection[ occupation %in% c('Angestellte','Beamtin','berufstätig'), occupation_clean := 'Berufstätig']
xsection[ occupation %in% c('Student (Chemie)','Examenskandidatin/Studentin'), occupation_clean := 'Studentin']
xsection[ occupation %in% c('wie "was"? Erwerbsstatus? ','In Bezug worauf?','ein Einhorn', 'glücklich', 'Mensch'), occupation_clean := NA]
(common1 = count(as.data.frame(xsection), vars = occupation_clean))
## # A tibble: 7 x 2
## # ... with 7 more rows, and 2 variables: vars <chr>, n <int>
#Religion_Cleaning
xsection[ , religion_clean := religion]
xsection[ religion %in% c('katholisch','Evangelisch','evangelisch', 'katholisch '), religion_clean := 'Christentum']
xsection[ religion %in% c('Atheist','keine','Atheismus', 'nicht organisiert gläubig', 'nichts', 'gar keiner', 'keiner'), religion_clean := 'Nicht gläubig']
xsection[ religion %in% c('agnostisch','Agnostizismus','evangelisch - buddhistisch'), religion_clean := NA]
(common2 = count(as.data.frame(xsection), vars = religion_clean))
## # A tibble: 6 x 2
## # ... with 6 more rows, and 2 variables: vars <chr>, n <int>
#Beziehungsstatus_Cleaning
xsection[ , relationship_status_clean := relationship_status]
xsection[ relationship_status %in% c('feste Beziehung, es kriselt ein wenig','vergeben','in einer festen Partnerschaft', 'feste Partnerschaft', 'Partnerschaft (nicht verheiratet)', 'in einer Beziehung', 'feste Beziehung', 'Fernbeziehung', 'Beziehung', 'Freundin', 'offen', 'frisch verliebt', 'es ist kriselt'), relationship_status_clean := 'Partnerschaft']
xsection[ relationship_status %in% c('verheiratet'), relationship_status_clean := 'Verheiratet']
(common3 = count(as.data.frame(xsection), vars = relationship_status_clean))
## # A tibble: 3 x 2
## # ... with 3 more rows, and 2 variables: vars <chr>, n <int>
# Verhütungsmethode beim Kennenlernen_Cleaning
xsection[ , method_meeting_clean := method_meeting]
xsection[ method_meeting %in% c('pille', 'pille, wie bereits 30 mal gesagt'), method_meeting_clean := 'Pille']
xsection[ method_meeting %in% c('diese'), method_meeting_clean := NA]
(common4 = count(as.data.frame(xsection), vars = method_meeting_clean))
## # A tibble: 7 x 2
## # ... with 7 more rows, and 2 variables: vars <chr>, n <int>
xsection[age < 10+ duration_relationship_total/12, list(age, duration_relationship_total/12, duration_relationship_years, duration_relationship_months)]
xsection[age < 10+ duration_relationship_total/12, duration_relationship_total := NA]
qplot(as.numeric(xsection$ended.vorab - xsection$created.vorab),binwidth=1) + xlim(0,120)
qplot(as.numeric(xsection$ended.vorab - xsection$created.vorab)) + xlim(0,NA)
#Verhütung
count(as.data.frame(xsection), vars=contraception) %>% arrange(desc(n)) %>% head(15)
## # A tibble: 15 x 2
## # ... with 15 more rows, and 2 variables: vars <chr>, n <int>
contrabroad = count(as.data.frame(xsection), vars=contraception) %>% arrange(desc(n))
## Transformation of contraceptive methods into broader categories
xsection[ contraception == "none, contraceptive_pill, morning_after_pill, other_hormonal, infertile, partner_infertile, intrauterine_pessar, kalendermethode, coitus_interruptus, spermicide", contraception := NA] # this one simply clicked everything
xsection$contraceptives_broad_categories = xsection$contraception
xsection[contraceptives_broad_categories %in% contrabroad[contrabroad$n <= 10, ]$vars, contraceptives_broad_categories := "other"]
xsection[ contraception %contains% "temperature_billings", contraceptives_broad_categories := "counting_and_or_temperature_methods"]
xsection[ contraception %contains% "kalendermethode", contraceptives_broad_categories := "counting_and_or_temperature_methods"]
xsection[ contraception %contains% "computer", contraceptives_broad_categories := "counting_and_or_temperature_methods"]
xsection[ contraception %contains% "coitus_interruptus", contraceptives_broad_categories := "occasionally_use_coitus_interruptus"]
(contrabroad = count(as.data.frame(xsection), vars=contraceptives_broad_categories) %>% arrange(desc(n)))
## # A tibble: 11 x 2
## # ... with 11 more rows, and 2 variables: vars <chr>, n <int>
# Contraception Categories "barrier or abstinence" vs "fertility awareness" vs "hormonal"
xsection[, contraceptive_method := NA_character_]
xsection[ contraception %contains% "condoms" | contraception %contains% "intrauterine_pessar" | contraception %contains% "partner_sterilised"| contraception %contains% "partner_infertile" | contraception %contains% "coitus_interruptus" | (contraception == "none" & had_sex_with_partner_yet == "no"), contraceptive_method := "barrier_or_abstinence"]
xsection[ contraception %contains% "temperature_billings" | contraception %contains% "kalendermethode" | contraception %contains% "computer", contraceptive_method := "fertility_awareness"]
xsection[ contraception %contains% "contraceptive_pill" | contraception %contains% "other_hormonal", contraceptive_method := "hormonal"]
xsection[ contraception == "none", contraceptive_method := "none"]
xsection[ is.na(contraceptive_method), contraceptive_method := "other"]
xsection[, contraceptive_method_by_pearl := NA_character_]
xsection[ contraception %contains% "temperature_billings" | contraception %contains% "kalendermethode" | contraception %contains% "computer", contraceptive_method_by_pearl := "fertility_awareness"]
xsection[ contraception %contains% "condoms" | contraception %contains% "intrauterine_pessar" | contraception %contains% "partner_sterilised" | contraception %contains% "partner_infertile" | (contraception == "none" & had_sex_with_partner_yet == "no"), contraceptive_method_by_pearl := "barrier_or_abstinence"]
xsection[ is.na(contraceptive_method_by_pearl) & contraception %contains% "coitus_interruptus" , contraceptive_method_by_pearl := "barrier_or_abstinence"]
xsection[ contraception %contains% "contraceptive_pill" | contraception %contains% "other_hormonal", contraceptive_method_by_pearl := "hormonal"]
xsection[ contraception == "none", contraceptive_method_by_pearl := "none"]
xsection[ is.na(contraceptive_method_by_pearl), contraceptive_method_by_pearl := "other"]
(count(as.data.frame(xsection[contraceptive_method == "other", ]), vars=contraception) %>% arrange(desc(n)) %>% data.frame())
## vars n
## 1 andere 8
## 2 sterilised 7
## 3 2
## 4 infertile 2
## 5 none, andere 2
## 6 infertile, sterilised 1
## 7 morning_after_pill 1
## 8 none, infertile 1
## 9 none, morning_after_pill 1
## 10 none, sterilised 1
## 11 <NA> 1
(contramethod = count(as.data.frame(xsection), vars=contraceptive_method) %>% arrange(desc(n)) %>% data.frame())
## vars n
## 1 hormonal 693
## 2 barrier_or_abstinence 329
## 3 none 98
## 4 fertility_awareness 61
## 5 other 27
crosstabs(~ contraceptive_method, data = xsection)
## contraceptive_method
## barrier_or_abstinence fertility_awareness hormonal none other
## 329 61 693 98 27
xsection[contraceptive_method == "other",list(contraception, had_sex_with_partner_yet == "yes", age, occupation, wish_for_child==1, breast_feeding==1, pregnant==1)] %>% na.omit()%>% pander()
contraception | V2 | age | occupation | V5 | V6 | V7 |
---|---|---|---|---|---|---|
TRUE | 23 | Studentin | TRUE | FALSE | FALSE | |
andere | TRUE | 28 | Studentin | FALSE | FALSE | FALSE |
sterilised | TRUE | 36 | Berufstätig | FALSE | FALSE | FALSE |
andere | FALSE | 24 | Studentin | FALSE | FALSE | FALSE |
andere | TRUE | 23 | Studentin | FALSE | FALSE | FALSE |
andere | FALSE | 23 | Studentin | FALSE | FALSE | FALSE |
andere | TRUE | 58 | Berufstätig | FALSE | FALSE | FALSE |
sterilised | TRUE | 37 | Berufstätig | FALSE | FALSE | FALSE |
none, infertile | TRUE | 25 | Studentin | FALSE | FALSE | FALSE |
sterilised | TRUE | 48 | Hausfrau | FALSE | FALSE | FALSE |
andere | TRUE | 41 | Berufstätig | FALSE | FALSE | FALSE |
infertile | TRUE | 57 | Berufstätig | FALSE | FALSE | FALSE |
none, sterilised | TRUE | 53 | Berufstätig | FALSE | FALSE | FALSE |
sterilised | TRUE | 50 | Berufstätig | FALSE | FALSE | FALSE |
none, andere | TRUE | 26 | Studentin | FALSE | FALSE | FALSE |
andere | TRUE | 28 | Studentin | FALSE | TRUE | TRUE |
none, morning_after_pill | TRUE | 28 | Berufstätig | FALSE | FALSE | FALSE |
sterilised | TRUE | 53 | Berufstätig | FALSE | FALSE | FALSE |
andere | FALSE | 19 | Studentin | FALSE | FALSE | FALSE |
sterilised | TRUE | 49 | Berufstätig | FALSE | FALSE | FALSE |
none, andere | TRUE | 25 | Studentin | FALSE | FALSE | TRUE |
infertile, sterilised | TRUE | 49 | Berufstätig | FALSE | FALSE | FALSE |
infertile | TRUE | 46 | Berufstätig | FALSE | FALSE | FALSE |
morning_after_pill | FALSE | 21 | Studentin | FALSE | FALSE | FALSE |
sterilised | TRUE | 50 | Berufstätig | FALSE | FALSE | FALSE |
TRUE | 22 | Studentin | FALSE | FALSE | FALSE |
(contramethod = count(as.data.frame(xsection), vars = contraceptive_method) %>% arrange(desc(n)) %>% data.frame())
## vars n
## 1 hormonal 693
## 2 barrier_or_abstinence 329
## 3 none 98
## 4 fertility_awareness 61
## 5 other 27
##Transformation of contraceptive methods into natural and artifical categories
# Hier müssen wir nochmal schauen, in welcher Reihenfolge wir das machen wollen. Im Moment werden - glaube ich? - die letzten Befehle als wichtigstes Kriterium eingeschätzt.
xsection$contraceptives_categories_natural = xsection$contraception
xsection[ contraception == "", contraceptives_categories_natural := NA_character_]
xsection[ contraception %contains% "none", contraceptives_categories_natural := "none"]
xsection[ contraception %contains% "condoms", contraceptives_categories_natural := "artifical"]
xsection[ contraception %contains% "contraceptive_pill", contraceptives_categories_natural := "artifical"]
xsection[ contraception %contains% "other_hormonal", contraceptives_categories_natural := "artifical"]
xsection[ contraception %contains% "intrauterine_pessar", contraceptives_categories_natural := "artifical"]
xsection[ contraception %contains% "morning_after_pill", contraceptives_categories_natural := "artifical"]
xsection[ contraception %contains% "coitus_interruptus", contraceptives_categories_natural := "natural"]
xsection[ contraception %contains% "kalendermethode", contraceptives_categories_natural := "natural"]
xsection[ contraception %contains% "computer", contraceptives_categories_natural := "natural"]
xsection[ contraception %contains% "temperature_billings", contraceptives_categories_natural := "natural"]
xsection[ contraception %contains% "sterilised", contraceptives_categories_natural := NA_character_]
xsection[ contraception %contains% "andere", contraceptives_categories_natural := NA_character_]
xsection[ contraception %contains% "partner_sterilised", contraceptives_categories_natural := NA_character_]
(contrabroadnatural = count(as.data.frame(xsection), vars=contraceptives_categories_natural) %>% arrange(desc(n)))
## # A tibble: 6 x 2
## # ... with 6 more rows, and 2 variables: vars <chr>, n <int>
## Transformation of number of children (combined into: "children" and "no children")
xsection$children_broad_categories = xsection$children
xsection[ children == "", children_broad_categories := 'no_children']
xsection[ children %contains% 1, children_broad_categories := "children"]
xsection[ children %contains% 2, children_broad_categories := "children"]
xsection[ children %contains% 3, children_broad_categories := "children"]
xsection[ children %contains% 4, children_broad_categories := "children"]
xsection[ children %contains% 5, children_broad_categories := "no_children"]
summary(xsection$children_broad_categories)
## Length Class Mode
## 1208 character character
## Attractiveness_finance
xsection$attractiveness_finance = (xsection$attractiveness_finance_1 + xsection$attractiveness_finance_2)/2
give proper variable labels for plots etc.
xsection$breast_feeding_in_last_3_months = ifelse(xsection$breast_feeding == 2, "no", "yes")
xsection$pregnant_in_last_3_months = ifelse(xsection$pregnant == 2, "no", "yes")
xsection$hormonal_medication_in_last_3_months = ifelse(xsection$antibiotics == 2, "no", "yes")
xsection$pill_in_last_3_months = ifelse(xsection$pille_control == 2, "no", "yes")
childrenrecode = c("1" = "Biological children", "2" = "Adopted children", "3" = "Step children", "4" = "Foster children", "5" = "No children")
# xsection$children_categories = str_replace_all(xsection$children, childrenrecode)
xsection$children_narrow_categories = childrenrecode[xsection$children]
xsection$children_narrow_categories[is.na(xsection$children_narrow_categories)] = "Complex family"
xsection$children_narrow_categories[xsection$children == ""] = "No children"
incomebrackets = c("1" = "< 500€", "2" = "500-1000€", "3" = "1000-2000€", "4" = "2000-3000€", "5" = "> 3000€")
# xsection$children_categories = str_replace_all(xsection$children, childrenrecode)
xsection$income = recode_ordered(incomebrackets, xsection$attractiveness_finance_self)
xsection$income_partner = incomebrackets[xsection$attractiveness_finance_2]
cohabitation = c("1" = "Long-distance", "2" = "Live in same city", "3" = "Live in same apartment")
xsection$cohabitation = cohabitation[xsection$long_distance_relationship]
living_situation = c("1" = "living alone", "2" = "living in flatshare")
xsection$living_situation = living_situation[xsection$situation_of_living]
xsection$living_situation = ifelse(xsection$long_distance_relationship == 3, "living with partner", xsection$living_situation)
xsection$living_situation = ifelse(xsection$living_situation == 'living in flatshare', ifelse(xsection$flat_share == 1, 'living in all-female flatshare', 'living in mixed-sex flatshare'), xsection$living_situation)
days_with_partner_per_month = c("1" = '< 3 days', '2' = '3-5 days', '3' = '5-7 days', '4' = '7-14 days', '5' = '> 14 days')
xsection$days_with_partner_per_month = recode_ordered(days_with_partner_per_month, xsection$long_distance_relationship_2)
distance_to_partner_hours = c("1" = '< 1h', '2' = '1-2h', '3' = '2-3h', '4' = '3-5h', '5' = '5-9h', '6' = '9-12h', '7' = '>12h')
xsection$distance_to_partner_hours = recode_ordered(distance_to_partner_hours, xsection$distance_partner)
days_with_partner = c('1' = '< 3 days', '2' = '3-5 days', '3' = '7 days') # forgot 6...
nights_with_partner = c('1' = '< 3 nights', '2' = '3-5 nights', '3' = '7 nights')
xsection$days_with_partner = recode_ordered(days_with_partner, xsection$days_with_partner)
xsection$nights_with_partner = recode_ordered(nights_with_partner, xsection$nights_with_partner)
xsection$duration_relationship_years = xsection$duration_relationship_total/12
xsection$cycle_length = xsection$cyklus_3
xsection$cycle_length_groups = cut(xsection$cycle_length, breaks = c(19,25,30,35,41))
xsection$certainty_menstruation = factor(round(rowMeans(xsection[, list(cyklus_2, cyklus_4, cyklus_6)])))
xsection$cycle_regularity = factor(xsection$cyklus_5, levels = 1:3, labels = c("very regular,\nup to 2 days off", "slightly irregular,\nup to 5 days off", "irregular,\nmore than 5 days off"))
xsection$sexual_intercourse_partner = NULL
xsection$wish_for_child = NULL
prev_names = names(xsection)
stopifnot(sum(duplicated(xsection$session)) == 0)
diary = merge(xsection, tagebuch, by = "session", all.x = T)
diary = setDF(diary)
table(is.na(diary$created.vorab))
##
## FALSE
## 30956
# diary[is.na(created.vorab),list(session, choice_of_clothing_2, created.vorab, created.abschluss)]
crosstabs(as.Date(diary$created) == as.Date(diary$created - dhours(9))) # we can left shift dates by 7 hours, because even though were supposed to start filling out before midnight, bugs in our survey software sometimes led to the invitations being sent a little and rarely a lot late)
## as.Date(diary$created) == as.Date(diary$created - dhours(9))
## FALSE TRUE <NA>
## 188 30736 32
diary = diary %>% filter(!is.na(created.vorab))
diary$weekend = factor(weekdays( diary$created) %in% c("Friday","Saturday","Sunday"))
diary$weekday = factor(weekdays( diary$created) , levels = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))
diary$first_day = ave(diary$created_date, diary$session, FUN = min)
diary$day_number = as.integer(round(as.numeric(diary$created_date - diary$first_day, units = "days")))
combos = diary %>% select(session, day_number)
combos$sess_day = paste(combos$session, combos$day_number)
table(duplicated(combos$sess_day))
##
## FALSE
## 30956
# head(combos)
diary$spent_night_with_partner = ifelse(diary$mate_retention_1 == 1, 1, 0)
diary$in_pair_public_intimacy = ifelse(diary$mate_retention_2 != 3, 1, 0)
diary2 = diary
To compute menstrual onsets from the diary data, we have to clear a few hurdles:
Therefore we use a multi-step procedure:
diary = diary2
diary = diary %>% setDF %>% group_by(session) %>%
mutate(
menstruation_dates = created_date, # get dates
menstruation_dates = if_else(menstruation_1 == 2 | is.na(menstruation_1), as.Date(NA), menstruation_dates), # set to missing one dates where no menstruation was reported
days = length(unique(created_date)),
day_count = seq_along(created),
first_diary_date = min(created_date),
last_diary_date = max(created_date),
timespan = as.numeric( last_diary_date - first_diary_date ),
# diary[, timespan := ave(x= created_num, session, FUN = function(x) { round((max(x,na.rm=T)-min(x,na.rm=T))/(24*3600)) } )]
days_responded_percentage = ifelse( timespan==0, NA_real_, days/timespan),
sufficient_diary_coverage = days >= 30 & days_responded_percentage > 0.5,
last_menstruation = if_else(all(is.na(menstruation_dates)), as.Date(NA), max(menstruation_dates, na.rm=T)),
last_lag = ifelse(is.na(last_menstruation), NA_real_, as.numeric(last_diary_date - last_menstruation)) # how many days after their last observed menstruation did they last fill out the diary
) %>% data.table()
crosstabs(diary$menstruation_2)
## diary$menstruation_2
## 1 2 3 4 5 6 <NA>
## 781 897 824 800 766 726 26162
diary[, days_since_menstrual_onset := ifelse(menstruation_2 == 6, NA_real_, menstruation_2 - 1 )] # the way we collected this variable, we have to set "more than 4 days ago" to missing, because it's unspecific
diary[, menstrual_onset_date_inferred := created_date - ddays(days_since_menstrual_onset)] ### count back from each current diary day, to create a menstrual onset date
crosstabs(is.na(diary$menstrual_onset_date_inferred))
## is.na(diary$menstrual_onset_date_inferred)
## FALSE TRUE
## 4068 26888
### define a variable containing the number of days since the last menstruation as counted on the current diary day
# step 1
menstrual_onsets = diary %>% setDF() %>%
group_by(session) %>%
arrange(created) %>%
select(session, created_date, menstrual_onset_date_inferred)
followup_survey = nachbe %>% setDF() %>%
mutate(
menstrual_onset_date_inferred = if_else(menstrual_bleeding_1 == 1, as.Date(created), as.Date(menstrual_bleeding_2)),
created_date = menstrual_onset_date_inferred) %>%
select(session, created_date, menstrual_onset_date_inferred)
pre_survey = vorab %>% setDF() %>%
mutate(menstrual_onset_date_inferred = as.Date(cyklus_1),
created_date = menstrual_onset_date_inferred) %>%
select(session, created_date, menstrual_onset_date_inferred)
mons = menstrual_onsets %>%
select(session, created_date, menstrual_onset_date_inferred) %>%
bind_rows(
pre_survey,
followup_survey
) %>%
filter( !is.na(menstrual_onset_date_inferred)) %>%
unique() %>%
group_by(session) %>%
arrange(session, created_date) %>%
# step 3: prefer reports closer to event if they conflict
mutate(
onset_diff = abs( as.double( lag(menstrual_onset_date_inferred) - menstrual_onset_date_inferred, units = "days")), # was there a change in the reference menstrual onset
onset_diff = if_else(onset_diff == 0, NA_real_, onset_diff, NA_real_), # we want to take the date that is freshest in memory. no switch is set to missing
menstrual_onset_date_inferred = if_else(is.na(lag(menstrual_onset_date_inferred)), # if last date not known
menstrual_onset_date_inferred, # use current date
if_else(onset_diff < 7, # if last date is known, but is slightly different from current date
as.Date(NA), # attribute it to memory, not extremely short cycle, use fresher date
menstrual_onset_date_inferred, # if it's a big difference, use the current date
as.Date(NA))), # if no date is assigned today, keep it like that
menstrual_onset_date_inferred = formr::repeat_last(menstrual_onset_date_inferred)) %>%
mutate(created_date = menstrual_onset_date_inferred) %>%
filter(!is.na(menstrual_onset_date_inferred)) %>%
select(-onset_diff) %>%
unique()
avg_cycle_lengths = mons %>%
select(session, menstrual_onset_date_inferred) %>%
unique() %>%
arrange(session, menstrual_onset_date_inferred) %>%
mutate(
number_of_cycles = n(),
cycle_nr = row_number(),
cycle_length_diary = as.double(lead(menstrual_onset_date_inferred) - menstrual_onset_date_inferred, units = "days"),
mean_cycle_length_diary = mean(cycle_length_diary, na.rm = TRUE),
median_cycle_length_diary = median(cycle_length_diary, na.rm = TRUE))
menstrual_onsets = menstrual_onsets %>% select(session, created_date) %>%
bind_rows(
diary = .,
demo = pre_survey %>% select(session, created_date),
followup = followup_survey %>% select(session, created_date),
.id = "date_origin") %>%
filter(!is.na(created_date)) %>%
unique() %>%
group_by(session) %>%
arrange(session, created_date) %>%
# step 2 expand into time-series for participant
complete(created_date = full_seq(created_date, period = 1)) %>%
left_join(mons, by = c("session", "created_date")) %>%
unique()
menstrual_onsets = menstrual_onsets %>%
group_by(session) %>%
mutate(
# carry the last observation (the last observed menstrual onset) backward/forward (within person).
# first we carry it backward (because reporting is retrospective)
next_menstrual_onset = formr::repeat_last(menstrual_onset_date_inferred, forward = FALSE),
# then we carry it forward
last_menstrual_onset = formr::repeat_last(menstrual_onset_date_inferred),
# calculate the diff to current date
menstrual_onset_days_until = as.numeric(created_date - next_menstrual_onset),
menstrual_onset_days_since = as.numeric(created_date - last_menstrual_onset)
)
# mons %>% filter(session %contains% "43a25b") %>% View()
# menstrual_onsets %>% filter(session %contains% "43a25b") %>% View()
# menstrual_onsets %>% filter(session %starts_with% "020f65ea45") %>% View()
diary = diary %>%
setDF() %>%
left_join(menstrual_onsets %>%
select(session, created_date, next_menstrual_onset, last_menstrual_onset, menstrual_onset_days_until, menstrual_onset_days_since) %>%
unique() # unique because some women reported the last menstrual date in the diary again in the followup
,
by = c("session", "created_date")
) %>%
left_join(avg_cycle_lengths %>% rename(last_menstrual_onset = menstrual_onset_date_inferred), by = c("session", "last_menstrual_onset")) %>%
mutate(
next_menstrual_onset_inferred = if_else(is.na(next_menstrual_onset),
last_menstrual_onset + days(cyklus_3),
next_menstrual_onset),
RCD_inferred = as.numeric(created_date - next_menstrual_onset_inferred)
) %>%
data.table()
diary[!duplicated(session) & median_cycle_length_diary <= 40 & median_cycle_length_diary >= 20, qplot(median_cycle_length_diary, cyklus_3) + geom_smooth()]
diary[!duplicated(session) & median_cycle_length_diary <= 40 & median_cycle_length_diary >= 20, lm(median_cycle_length_diary ~ cyklus_3)]
##
## Call:
## lm(formula = median_cycle_length_diary ~ cyklus_3)
##
## Coefficients:
## (Intercept) cyklus_3
## 20.681 0.287
# diary %>% filter(session %starts_with% "020f65ea45") %>% select(created_date, next_menstrual_onset, last_menstrual_onset, menstrual_onset_days_until, menstrual_onset_days_since) %>% View()
xtabs(~ is.na(menstrual_onset_days_until) + is.na(menstrual_onset_days_since), diary)
## is.na(menstrual_onset_days_since)
## is.na(menstrual_onset_days_until) FALSE TRUE
## FALSE 18614 0
## TRUE 12310 32
diary = diary %>%
mutate(
FCD = menstrual_onset_days_since + 1,
RCD = menstrual_onset_days_until,
RCD_for_merge = (RCD * -1) + 1,
RCD_squished = if_else(
cycle_length_diary - FCD < 14,
29 - (cycle_length_diary - FCD),
round((FCD/ (cycle_length_diary - 14) ) * 15)),
RCD_inferred = (RCD_inferred * -1) + 1,
# add 15 days to the reverse cycle days to arrive at the estimated day of ovulation
RCD_rel_to_ovulation = RCD + 15
)
crosstabs(diary$RCD_squished)
## diary$RCD_squished
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 74 426 444 382 437 458 467 310 547 488 534 509 641 574 663 643 606 612
## 18 19 20 21 22 23 24 25 26 27 28 29 <NA>
## 663 668 694 716 762 768 801 780 820 828 846 857 12938
crosstabs(diary$RCD_for_merge)
## diary$RCD_for_merge
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 934 857 846 828 820 780 801 768 759 715 693 667 661 609 601 582 563 542
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## 529 499 477 449 419 401 370 342 305 252 159 119 100 77 65 49 49 44
## 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## 37 34 24 33 28 31 32 25 27 24 17 19 14 21 20 18 12 16
## 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 15 10 13 8 10 9 9 11 7 8 4 5 8 8 7 8 10 9
## 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 9 7 9 8 7 5 9 4 5 3 4 2 2 6 6 5 3 2
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## 5 3 5 3 4 4 3 2 3 2 5 3 6 5 2 1 2 1
## 109 112 113 114 115 117 118 120 121 122 123 124 125 126 127 128 129 130
## 3 1 2 1 2 1 1 2 1 2 1 1 1 2 1 1 2 2
## 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 150 151 155
## 2 1 2 3 1 2 2 2 1 1 1 1 2 2 2 1 1 1
## 156 157 158 159 161 163 164 166 167 168 170 171 174 175 178 179 182 183
## 1 1 2 2 1 1 1 1 1 1 1 1 2 1 1 1 1 1
## 186 187 189 190 191 192 193 194 195 196 197 198 199 200 201 203 205 208
## 2 1 2 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1
## 209 210 211 218 224 227 242 248 249 250 251 252 253 265 282 300 301 302
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 315 317 318 319 323 344 347 348 349 350 351 352 353 354 355 356 357 358
## 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1
## 359 360 <NA>
## 1 1 12342
crosstabs(diary$FCD)
## diary$FCD
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 934 951 928 978 1003 951 938 978 957 960 939 939 936 948 923 899 933 883 896 924 882 872
## 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
## 861 849 812 743 723 644 415 271 215 195 172 150 142 140 116 101 97 90 92 85 91 82
## 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
## 91 82 77 84 80 70 64 58 63 65 56 54 38 45 41 36 47 49 75 77 74 73
## 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
## 63 67 63 65 51 51 58 50 49 56 49 45 52 43 44 38 42 36 36 40 34 33
## 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
## 35 32 28 31 31 32 30 26 27 21 26 26 21 21 14 19 13 8 8 10 8 9
## 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
## 4 6 4 5 5 4 6 10 6 6 5 2 1 2 6 4 4 2 7 3 5 3
## 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
## 8 2 3 2 4 2 4 3 6 3 3 2 4 5 2 2 2 1 1 2 2 2
## 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
## 2 2 4 5 2 1 3 4 3 4 1 3 1 2 3 2 1 2 1 4 2 2
## 178 179 181 183 186 188 192 195 196 197 198 200 201 204 206 207 208 209 211 214 216 218
## 2 1 1 1 1 2 2 2 1 3 1 1 2 1 1 1 1 1 3 2 2 3
## 220 221 222 223 225 229 233 234 235 236 239 240 242 245 247 250 251 252 257 258 260 261
## 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 3 1
## 262 263 264 265 267 268 271 272 275 280 281 283 284 285 292 294 299 300 301 303 304 310
## 1 1 1 1 3 1 2 2 1 2 2 2 1 1 1 2 1 1 2 2 1 1
## 311 312 313 318 321 322 326 329 330 331 332 333 334 336 337 338 342 351 375 396 397 398
## 1 2 1 2 1 1 1 1 2 1 1 1 1 1 1 2 1 2 1 1 1 1
## 404 432 440 483 615 <NA>
## 1 1 1 1 1 32
crosstabs(~RCD_squished+ RCD_for_merge, diary %>% filter(cycle_length_diary<40))
## RCD_for_merge
## RCD_squished 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## 1 255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 2 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 7
## 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4 7 19
## 5 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 9 18 31
## 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 4 6 35 146
## 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 20 48 55
## 8 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 16 27 78 184 62
## 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 6 8 91 205 47 25
## 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 4 17 116 216 56 25 19
## 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 5 59 250 64 28 4 0
## 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 82 339 62 17 0 0 0
## 13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 17 358 56 2 0 0 0 0
## 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 481 32 0 0 0 0 0 0
## 15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 533 0 0 0 0 0 0 0 0
## 16 5 0 0 0 0 0 0 0 0 0 0 0 0 0 542 0 0 0 0 0 0 0 0 0
## 17 3 0 0 0 0 0 0 0 0 0 0 0 0 555 0 0 0 0 0 0 0 0 0 0
## 18 2 0 0 0 0 0 0 0 0 0 0 0 599 0 0 0 0 0 0 0 0 0 0 0
## 19 1 0 0 0 0 0 0 0 0 0 0 608 0 0 0 0 0 0 0 0 0 0 0 0
## 20 1 0 0 0 0 0 0 0 0 0 629 0 0 0 0 0 0 0 0 0 0 0 0 0
## 21 1 0 0 0 0 0 0 0 0 644 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 22 3 0 0 0 0 0 0 0 681 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 23 0 0 0 0 0 0 0 685 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 24 0 0 0 0 0 0 720 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 25 0 0 0 0 0 703 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 26 0 0 0 0 742 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 27 0 0 0 749 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 28 0 0 772 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 29 0 772 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## RCD_for_merge
## RCD_squished 25 26 27 28 29 30 31 32 33 34 35 36 37 38
## 1 0 0 0 0 0 0 0 0 0 0 6 4 5 1
## 2 0 29 36 79 41 20 25 20 13 13 6 8 2 0
## 3 16 39 83 40 19 28 10 6 11 10 6 0 0 0
## 4 30 92 48 40 31 19 10 9 6 1 0 0 0 0
## 5 133 46 48 10 17 11 12 8 1 0 0 0 0 0
## 6 56 46 23 25 11 6 5 0 0 0 0 0 0 0
## 7 48 25 16 13 6 1 0 0 0 0 0 0 0 0
## 8 29 27 17 5 0 0 0 0 0 0 0 0 0 0
## 9 21 4 0 0 0 0 0 0 0 0 0 0 0 0
## 10 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 13 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 15 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 23 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 24 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 25 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 26 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 27 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 29 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ggplot(diary %>% filter(cycle_length_diary<40), aes(FCD, RCD_squished)) + geom_point() + facet_wrap(~cycle_length_diary)
days = data.frame(
RCD_for_merge = c(29:1, 30:41),
FCD = c(1:41),
prc_stirn_b = c(.01, .01, .02, .03, .05, .09, .16, .27, .38, .48, .56, .58, .55, .48, .38, .28, .20, .14, .10, .07, .06, .04, .03, .02, .01, .01, .01, .01, .01, rep(.01, times = 12)),
# rep(.01, times = 70)), # gangestad uses .01 here, but I think such cases are better thrown than kept, since we might simply have missed a mens
prc_wcx_b = c(.000, .000, .001, .002, .004, .009, .018, .032, .050, .069, .085, .094, .093, .085, .073, .059, .047, .036, .028, .021, .016, .013, .010, .008, .007, .006, .005, .005, .005, rep(.005, times = 12))
)
# rep(NA_real_, times = 70)) # gangestad uses .005 here, but I think such cases are better thrown than kept, since we might simply have missed a mens
days = days %>% mutate(
fertile_narrow = ifelse(between(RCD_for_merge,15,19), mean(prc_stirn_b[between(RCD_for_merge,15, 19)], na.rm = T),
ifelse(between(RCD_for_merge,4,12), mean(prc_stirn_b[between(RCD_for_merge,4,12)], na.rm = T), NA_real_)), # these days are likely infertile
fertile_broad = ifelse(between(RCD_for_merge,14,22), mean(prc_stirn_b[between(RCD_for_merge,14,22)], na.rm = T),
ifelse(between(RCD_for_merge,4,12), mean(prc_stirn_b[between(RCD_for_merge,4,12)], na.rm = T), NA_real_)), # these days are likely infertile
fertile_window = factor(ifelse(fertile_broad, ifelse(!is.na(fertile_narrow), "narrow", "broad"),"infertile"), levels = c("infertile","broad", "narrow")),
premenstrual_phase = ifelse(between(RCD_for_merge, 2, 7), TRUE, FALSE)
)
rcd_days = days %>% select(-FCD)
diary = left_join(diary %>% setDF(), rcd_days, by = "RCD_for_merge")
fcd_days = days %>% select(-RCD_for_merge)
names(fcd_days) = paste0(names(fcd_days), "_forward_counted")
fcd_days = fcd_days %>% rename(FCD = FCD_forward_counted)
diary = left_join(diary, fcd_days, by = "FCD")
rcd_inferred_days = days %>% select(-FCD)
names(rcd_inferred_days) = paste0(names(rcd_inferred_days), "_backward_inferred")
rcd_inferred_days = rcd_inferred_days %>%
rename(RCD_inferred = RCD_for_merge_backward_inferred)
diary = left_join(diary, rcd_inferred_days, by = "RCD_inferred")
rcd_squished = days %>% select(-RCD_for_merge)
names(rcd_squished) = paste0(names(rcd_squished), "_squished")
rcd_squished = rcd_squished %>%
rename(RCD_squished = FCD_squished)
diary = left_join(diary, rcd_squished, by = "RCD_squished")
ggplot(diary %>% filter(cycle_length_diary<40), aes(FCD, prc_stirn_b_squished))+geom_point() + facet_wrap(~cycle_length_diary )
ggplot(diary %>% filter(cycle_length_diary<40), aes(RCD, prc_stirn_b_squished))+geom_point() + facet_wrap(~cycle_length_diary )
ggplot(diary %>% filter(cycle_length_diary<40) %>% select(FCD,RCD,prc_stirn_b,prc_stirn_b_squished,cycle_length_diary) %>% gather(approach,prc_stirn_b,-RCD,-FCD, -cycle_length_diary), aes(RCD, prc_stirn_b, colour = approach))+geom_point() + facet_wrap(~cycle_length_diary ) + geom_vline(aes(xintercept = cycle_length_diary/2 * -1))
diary %>% select(starts_with("prc_stirn_b")) %>%
cor(use='pairwise.complete.obs')
## prc_stirn_b prc_stirn_b_forward_counted prc_stirn_b_backward_inferred
## prc_stirn_b 1.0000 0.6846 1.0000
## prc_stirn_b_forward_counted 0.6846 1.0000 0.7153
## prc_stirn_b_backward_inferred 1.0000 0.7153 1.0000
## prc_stirn_b_squished 0.8578 0.6695 0.8578
## prc_stirn_b_squished
## prc_stirn_b 0.8578
## prc_stirn_b_forward_counted 0.6695
## prc_stirn_b_backward_inferred 0.8578
## prc_stirn_b_squished 1.0000
pander(missingness_patterns(diary %>% ungroup %>% select(prc_stirn_b, prc_stirn_b_forward_counted, prc_stirn_b_backward_inferred)))
index col missings 1 prc_stirn_b 13069 2 prc_stirn_b_backward_inferred 3813 3 prc_stirn_b_forward_counted 3576
Pattern | Freq | Culprit |
---|---|---|
_____ | 16782 | _ |
1____ | 9256 | prc_stirn_b |
1_2_3 | 2471 | |
1_2__ | 1342 | |
____3 | 1105 | prc_stirn_b_forward_counted |
diary = diary %>%
mutate(fertile_fab = if_else(is.na(prc_stirn_b), prc_stirn_b_backward_inferred, prc_stirn_b),
premenstrual_phase_fab = if_else(is.na(premenstrual_phase), premenstrual_phase_backward_inferred, premenstrual_phase),
fertile_forward_and_backward = fertile_fab
) %>%
data.table()
diary[, menstruated_at_all := menstruation_1 == 1]
diary[ is.na(menstruated_at_all), menstruated_at_all := F]
table(diary$menstruated_at_all, exclude = NULL)
##
## FALSE TRUE
## 26151 4805
diary[, menstruation := factor(ifelse(menstruated_at_all,"yes", ifelse(premenstrual_phase, "pre","no")), levels = c("no", "pre", "yes"))]
diary[is.na(menstruation), menstruation := "no"]
# diary %>% select(short, created_date, ended, last_menstrual_onset,next_menstrual_onset, menstrual_onset_days_until, menstrual_onset_days_since, prc_stirn_b, prc_stirn_b_forward_counted, fertile_fab) %>% View()
diary[, ever_menstruated := ave(menstruated_at_all, session, FUN = any)]
diary[, any_RCD := !ave(is.na(RCD), session, FUN = all)]
diary[, menstruation_strength := menstruation_3]
diary[ is.na(menstruation_strength), menstruation_strength := 0]
diary[, menstruation_strength := factor(menstruation_strength)]
diary[, menstrual_onset := created_date == next_menstrual_onset]
diary[, we_know_fertile_days := ave(!is.na(prc_stirn_b) | !is.na(prc_stirn_b_forward_counted), session, FUN = any)]
crosstabs(diary[!duplicated(session),]$we_know_fertile_days)
## diary[!duplicated(session), ]$we_know_fertile_days
## FALSE TRUE
## 96 1112
diary$fertile_cont = diary$prc_stirn_b
diary %>% filter(sufficient_diary_coverage==T & ever_menstruated == T & any_RCD == T) %>%
ggplot(aes(x = menstrual_onset_days_until, y = as.factor(as.numeric(as.factor(session)))))+
scale_y_discrete("Person") +
geom_tile(aes(fill = fertile_cont)) +
scale_x_continuous("Relative to last observed menstrual onset", limits= c(-40, 15), breaks = seq(-40,15, by = 5)) +
geom_point(aes(colour=menstruation_strength)) +
scale_fill_distiller() +
scale_color_manual(values = c("0"="transparent","1"="pink","2"="red","3"="darkred")) +
geom_vline(aes(xintercept = limits, linetype = Window),data = data.frame(limits= c(-14.5,-19.5, -13.5,-22.5,-3.5,-12.5), Window = rep(c("narrow","broad","infertile"),each=2)),color = 'black', size = 0.9, alpha = 0.9,show.legend = T) +
scale_linetype_manual(values = c("narrow"="solid","broad"="dashed","infertile"="dotted"))
# a case where the nachbe helped
# diary[session %contains% "eaca39", list(created_date, menstruation_dates, next_menstrual_onset, last_menstrual_onset, menstruated_at_all, menstruation_2,RCD, fertile_cont,fertile_broad) ] %>% arrange(created_date) %>% View()
# a case where we observed a menstruation at the end and in the beginning, but missed the onsets
# diary[session %contains% "b270e1", list(created_date, menstruation_dates, next_menstrual_onset, last_menstrual_onset, menstruated_at_all, menstruation_2,RCD, fertile_cont,fertile_broad) ] %>% arrange(created_date) %>% View()
# a case where we observed a menstruation at the end and in the beginning
# diary[session %contains% "0b8734", list(created_date, menstruation_dates, next_menstrual_onset, last_menstrual_onset, menstruated_at_all, menstruation_2,RCD, fertile_cont,fertile_broad) ] %>% arrange(created_date) %>% View()
# a case where she first indicated menstrual onset on the day it occurred (likely correct), then two days later reported that it occurred yesterday (likely incorrect). problem solved by not treating extremely close together menstrual onsets as separate, instead using the earlier date (closer to onset, more likely to be correct)
# diary[session %contains% "c7cc8e", list(created_date, menstruation_dates, next_menstrual_onset, last_menstrual_onset, menstruated_at_all, menstruation_2,RCD, fertile_cont,fertile_broad) ] %>% arrange(created_date) %>% View()
## two cases with a little misreporting of ongoing periods
# diary[session %contains% "43a25b", list(created_date, menstruation_dates, next_menstrual_onset, last_menstrual_onset, menstruated_at_all, menstruation_2,RCD, fertile_cont,fertile_broad) ] %>% arrange(created_date) %>% head(60) %>% View()
# diary[session %contains% "8d4629", list(created_date, menstruation_dates, next_menstrual_onset, last_menstrual_onset, menstruated_at_all, menstruation_2,RCD, fertile_cont,fertile_broad) ] %>% arrange(created_date) %>% head(60) %>% View()
qplot(data = diary[!duplicated(session) & between(median_cycle_length_diary, 20,40), ], median_cycle_length_diary, cyklus_3, colour = factor(cyklus_4)) + geom_smooth(method = 'lm')
cor.test(diary[!duplicated(session) & between(median_cycle_length_diary, 20,40) & cyklus_4 > 4, ]$median_cycle_length_diary, diary[!duplicated(session) & between(median_cycle_length_diary, 20,40) & cyklus_4 > 4, ]$cyklus_3)
##
## Pearson's product-moment correlation
##
## data: diary[!duplicated(session) & between(median_cycle_length_diary, and diary[!duplicated(session) & between(median_cycle_length_diary, 20, 40) & cyklus_4 > 4, ]$median_cycle_length_diary and 20, 40) & cyklus_4 > 4, ]$cyklus_3
## t = 4.5, df = 250, p-value = 0.00001
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1537 0.3815
## sample estimates:
## cor
## 0.2714
diary$week_number = cut(diary$day_number, c(0,7,14,21,28,35,600), include.lowest = T)
diary = diary %>%
group_by(session, cycle_nr) %>%
mutate(minimum_cycle_length_diary = if_else(!is.na(cycle_length_diary), cycle_length_diary,
max(FCD,na.rm = T)),
minimum_cycle_length_diary = if_else(minimum_cycle_length_diary == -Inf, NA_real_, minimum_cycle_length_diary)
) %>%
group_by(session) %>%
data.table()
pipeline$completed_pre_survey = nrow(xsection)
some preparations:
library(dplyr)
min_max_diff = function(x) {as.numeric(max(as.Date(x),na.rm=T) - min(as.Date(x)),na.rm=T) }
diary_summaries = diary %>%
group_by(session) %>%
summarise(took_days = min_max_diff(created),
n_days = sum(!is.na(ended) & !is.na(communication_partner_1)), # look at first item too, because some had ended due to being "closed" after some time of non-response, a software thing
avg_diff = mean(as.numeric(diff(created), units = "days"),na.rm = T),
biggest_diff = max(as.numeric(diff(created), units = "days"),na.rm = T),
had_any_period = any(menstruation_1 == 1, na.rm = T),
fertile_days_known_backward = sum(!is.na(ended) & !is.na(communication_partner_1) & !is.na(prc_stirn_b)),
fertile_days_known_backward_inferred = sum(!is.na(ended) & !is.na(communication_partner_1) & !is.na(prc_stirn_b_backward_inferred)),
fertile_days_known_forward = sum(!is.na(ended) & !is.na(communication_partner_1) & !is.na(prc_stirn_b_forward_counted))
)
diary_summaries[, any_fertile_days_known := fertile_days_known_backward_inferred > 0]
crosstabs(~ any_fertile_days_known + had_any_period, diary_summaries)
## had_any_period
## any_fertile_days_known FALSE TRUE
## FALSE 124 6
## TRUE 174 904
crosstabs(~ had_any_period + (n_days > 20), data = diary_summaries)
## n_days > 20
## had_any_period FALSE TRUE
## FALSE 229 69
## TRUE 163 747
diary_summaries[any_fertile_days_known == T, had_any_period := T]
xsection = merge(xsection, diary_summaries, by = "session", all.x = T) %>% data.table()
crosstabs(~ age + had_any_period, data = xsection[n_days > 35, ])
## had_any_period
## age FALSE TRUE
## 18 1 10
## 19 0 28
## 20 0 41
## 21 0 32
## 22 2 27
## 23 2 34
## 24 1 14
## 25 0 17
## 26 0 15
## 27 0 9
## 28 0 7
## 29 0 7
## 30 0 5
## 31 0 5
## 32 0 7
## 33 1 2
## 34 0 4
## 35 0 3
## 36 0 2
## 37 0 3
## 38 0 1
## 39 0 3
## 40 0 3
## 41 0 3
## 42 0 2
## 43 0 1
## 45 0 3
## 46 0 2
## 47 0 3
## 48 0 2
## 49 2 2
## 50 1 3
## 51 0 1
## 53 1 0
## 55 0 1
## 58 0 1
## 60 1 0
crosstabs(~ age + any_fertile_days_known, data = xsection[n_days > 35, ])
## any_fertile_days_known
## age FALSE TRUE
## 18 1 10
## 19 0 28
## 20 0 41
## 21 0 32
## 22 2 27
## 23 2 34
## 24 1 14
## 25 0 17
## 26 0 15
## 27 0 9
## 28 0 7
## 29 0 7
## 30 0 5
## 31 0 5
## 32 0 7
## 33 1 2
## 34 0 4
## 35 0 3
## 36 0 2
## 37 0 3
## 38 0 1
## 39 0 3
## 40 0 3
## 41 0 3
## 42 0 2
## 43 0 1
## 45 0 3
## 46 0 2
## 47 0 3
## 48 0 2
## 49 2 2
## 50 1 3
## 51 0 1
## 53 1 0
## 55 0 1
## 58 0 1
## 60 1 0
crosstabs(~ age + (fertile_days_known_backward > 0), data = xsection[n_days > 35, ])
## fertile_days_known_backward > 0
## age FALSE TRUE
## 18 1 10
## 19 1 27
## 20 3 38
## 21 1 31
## 22 3 26
## 23 4 32
## 24 1 14
## 25 0 17
## 26 1 14
## 27 0 9
## 28 0 7
## 29 0 7
## 30 1 4
## 31 0 5
## 32 0 7
## 33 1 2
## 34 0 4
## 35 0 3
## 36 0 2
## 37 0 3
## 38 1 0
## 39 0 3
## 40 0 3
## 41 0 3
## 42 0 2
## 43 1 0
## 45 0 3
## 46 1 1
## 47 1 2
## 48 0 2
## 49 2 2
## 50 3 1
## 51 0 1
## 53 1 0
## 55 1 0
## 58 1 0
## 60 1 0
crosstabs(~ age + (fertile_days_known_forward > 0), data = xsection)
## fertile_days_known_forward > 0
## age FALSE TRUE
## 18 7 29
## 19 6 68
## 20 8 85
## 21 8 104
## 22 9 96
## 23 11 118
## 24 7 101
## 25 3 73
## 26 5 80
## 27 5 47
## 28 7 39
## 29 3 32
## 30 5 24
## 31 0 16
## 32 0 20
## 33 4 23
## 34 1 11
## 35 0 11
## 36 0 13
## 37 1 9
## 38 0 9
## 39 1 12
## 40 1 9
## 41 1 8
## 42 0 3
## 43 0 3
## 44 0 7
## 45 2 7
## 46 0 3
## 47 1 7
## 48 1 3
## 49 3 6
## 50 3 8
## 51 1 3
## 52 2 1
## 53 3 2
## 54 1 0
## 55 1 1
## 57 0 1
## 58 1 1
## 60 2 0
## 69 0 1
unique(xsection[n_days > 0, list(age, n_days, fertile_days_known_forward, fertile_days_known_backward, had_any_period,any_fertile_days_known)]) %>% arrange(age) %>% tail(40)
unique(xsection[n_days >= 30, list(age, n_days, fertile_days_known_forward, fertile_days_known_backward, had_any_period,any_fertile_days_known)]) %>% arrange(age) %>% tail(40)
xsection[age > 45 & !had_any_period, any_fertile_days_known := FALSE]
unique(xsection[n_days >= 30 & any_fertile_days_known == T, list(age, n_days, fertile_days_known_forward, fertile_days_known_backward, had_any_period,any_fertile_days_known)]) %>% arrange(age) %>% tail(40)
excluded_old = NULL
xsection[, include_all := T] # default is to include everybody
xsection[ include_all == T, include_all := !is.na(n_days) & n_days > 0] # ever responded to diary
pipeline$never_did_diary = n_excluded(xsection$include_all)
## 54 excluded
xsection[str_sub(session, 1,10) == "cd78621db4", include_all := F] # reported hormonal treatment in preparation for IVF
xsection[ include_all == T, include_all := ! pregnant %contains% "1"] # only non-pregnant (in last 3 months)
pipeline$pregnant = n_excluded(xsection$include_all)
## 24 excluded
xsection[ include_all == T, include_all := ! contraception %contains_word% "infertile"] # only not infertile
pipeline$infertile = n_excluded(xsection$include_all)
## 5 excluded
xsection[ include_all == T, include_all := ! contraception %contains_word% "sterilised"] # only non-pregnant (in last 3 months)
pipeline$sterilised = n_excluded(xsection$include_all)
## 12 excluded
xsection[ include_all == T, include_all := any_fertile_days_known] # only those where we could infer fertility on at least one day
pipeline$fertility_not_estimable = n_excluded(xsection$include_all)
## 59 excluded
# here we allow those who breastfed in last 3 months, because those we excluded those with lactational amenorrhoe already above
# xsection[ include_all == T, include_all := ! breast_feeding %contains% "1"] # only non-breast_feeding
# n_excluded(xsection$include_all)
xsection[, hormonal_all := include_all]
pipeline$naturally_cycling = sum(xsection[ include_all == T,
! contraception %contains% "contraceptive_pill" &
!contraception %contains% "other_hormonal"], na.rm = T)
xsection[ include_all == T, include_all :=
! contraception %contains% "contraceptive_pill" &
!contraception %contains% "other_hormonal"] # only non-hormonally contracepting, in this case we leave women who take the morning after pill with the naturally cycling women
pipeline_hc$HC_users = sum(xsection[ hormonal_all == T,
contraception %contains% "contraceptive_pill" |
contraception %contains% "other_hormonal"], na.rm = T)
xsection[ hormonal_all == T, hormonal_all :=
contraception %contains% "contraceptive_pill" |
contraception %contains% "other_hormonal"] # only hormonally contracepting
xsection[, included_bool := include_all & !hormonal_all]
xsection[hormonal_all == F & include_all == F, included_bool := NA]
xsection[, included_all := factor(ifelse(included_bool, "cycling", "horm_contra"))]
crosstabs(~ included_all, xsection)
## included_all
## cycling horm_contra
## 429 625
429 non-hormonally contracepting women and 625 hormonally contracepting women are included in the sample with only mandatory exclusion criteria.
excluded_old = NULL
xsection[, include_lax := include_all] # default is to include everybody
n_excluded(xsection$include_lax)
## 779 excluded
## [1] 779
xsection[ include_lax == T, include_lax := age < 40] # include only those who are younger than 40
pipeline$older_than_40 = n_excluded(xsection$include_lax)
## 54 excluded
xsection[ include_lax == T, include_lax := ! is.na (ended.abschluss)] # include only those who finished the abschluss survey
pipeline$finished_post_survey = n_excluded(xsection$include_lax)
## 137 excluded
xsection[ include_lax == T, include_lax := ! contraception %contains% "contraceptive_pill" & ! contraception %contains% "morning_after_pill" & !contraception %contains% "other_hormonal" & !contraception %contains% "infertile"] # only non-hormonally contracepting
pipeline$hc_users_broad = n_excluded(xsection$include_lax)
## 2 excluded
xsection[ include_lax == T, include_lax := ! breast_feeding %contains% "1"] # only non-breast_feeding
pipeline$breast_feeding = n_excluded(xsection$include_lax)
## 6 excluded
xsection[ include_lax == T, include_lax := !pille_control %contains% "1"] # pill in the last 3 months
pipeline$pill_last_3m = n_excluded(xsection$include_lax)
## 14 excluded
xsection[ include_lax == T, include_lax := !antibiotics %contains% "1"] # no hormonal medication
pipeline$hormonal_medicine = n_excluded(xsection$include_lax)
## 25 excluded
table(xsection$cyklus_3) #show cycle lengths
##
## 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 13 22 7 23 28 87 66 57 504 84 150 29 31 13 9 36 2 2 2 1 42
xsection[, hormonal_contra := contraception %contains% "contraceptive_pill" & ! contraception %contains% "morning_after_pill" & !contraception %contains% "other_hormonal" & !contraception %contains% "infertile"]
crosstabs(~ xsection$hormonal_contra + xsection$cyklus_4)
## xsection$cyklus_4
## xsection$hormonal_contra 1 2 3 4 5
## FALSE 62 66 119 169 189
## TRUE 33 46 95 194 234
## <NA> 0 0 0 1 0
xtabs(~ cyklus_3 + cyklus_4, data = xsection)
## cyklus_4
## cyklus_3 1 2 3 4 5
## 20 4 0 1 4 4
## 21 1 3 4 7 7
## 22 2 1 1 2 1
## 23 1 1 10 7 4
## 24 2 0 3 14 9
## 25 16 13 17 26 15
## 26 4 4 11 33 14
## 27 4 9 12 16 16
## 28 17 28 70 141 248
## 29 1 5 15 34 29
## 30 17 26 39 42 26
## 31 3 0 4 12 10
## 32 3 7 6 5 10
## 33 0 3 3 4 3
## 34 0 1 2 3 3
## 35 4 4 9 7 12
## 36 1 0 0 0 1
## 37 0 0 1 0 1
## 38 1 0 0 1 0
## 39 0 0 1 0 0
## 40 14 7 5 6 10
xsection[ include_lax == T, include_lax := cyklus_3 > 21 & cyklus_3 < 38] #exclude those with wrong cycle lengths
pipeline$long_or_short_cycle = n_excluded(xsection$include_lax)
## 10 excluded
crosstabs(~ include_lax + hypothesis_guessed, data = xsection)
## hypothesis_guessed
## include_lax hormones menstruation pille zyklus zyklus, pille
## FALSE 986 6 1 3 29 1
## TRUE 157 0 0 3 21 0
## <NA> 1 0 0 0 0 0
xsection[ include_lax == T, include_lax := is.na(hypothesis_guessed) | ! hypothesis_guessed %contains% "zyklus"] #exclude those who guessed hypotheses
pipeline$guessed_hypothesis = n_excluded(xsection$include_lax)
## 21 excluded
xsection[ include_lax == T, include_lax := !(took_days > 70 | avg_diff > 10)] #exclude laggards
pipeline$laggards = n_excluded(xsection$include_lax)
## 17 excluded
pipeline$criterion_lax = sum(xsection$include_lax, na.rm = T)
143 non-hormonally contracepting women and 0 hormonally contracepting women are included in the sample with lax inclusion criteria.
excluded_old = NULL
xsection[, hormonal_lax := hormonal_all] # default is to include everybody
n_excluded(xsection$hormonal_lax)
## 583 excluded
## [1] 583
xsection[ hormonal_lax == T, hormonal_lax := age < 40] # include only those below 40
pipeline_hc$older_than_40 = n_excluded(xsection$hormonal_lax)
## 10 excluded
xsection[ hormonal_lax == T, hormonal_lax := ! is.na (ended.abschluss)] # include only those who finished the abschluss survey
pipeline_hc$finished_post_survey = n_excluded(xsection$hormonal_lax)
## 168 excluded
xsection[ hormonal_lax == T, hormonal_lax := ! breast_feeding %contains% "1"] # only non-breast_feeding
pipeline_hc$breast_feeding = n_excluded(xsection$hormonal_lax)
## 2 excluded
xsection[ hormonal_lax == T, hormonal_lax := cyklus_3 > 21 & cyklus_3 < 38] #exclude those with wrong cycle lengths
pipeline_hc$long_or_short_cycle = n_excluded(xsection$hormonal_lax)
## 28 excluded
xsection[ hormonal_lax == T, hormonal_lax := ! hypothesis_guessed %contains% "zyklus" & ! hypothesis_guessed %contains% "hormones"] #exclude those who guessed hypotheses
pipeline_hc$guessed_hypothesis = n_excluded(xsection$hormonal_lax)
## 19 excluded
xsection[ hormonal_lax == T, hormonal_lax := !(took_days > 70 | avg_diff > 10)] #exclude laggards
pipeline_hc$laggards = n_excluded(xsection$hormonal_lax)
## 24 excluded
xsection[, included_bool := include_lax & !hormonal_lax]
xsection[hormonal_lax == F & include_lax == F, included_bool := NA]
xsection[, included_lax := factor(ifelse(included_bool, "cycling", "horm_contra"))]
is.na(xsection$included_lax)[is.na(xsection$included_bool)] <- TRUE
crosstabs( ~factor(xsection$included_lax,exclude = NULL))
## factor(xsection$included_lax, exclude = NULL)
## cycling horm_contra <NA>
## 143 374 691
pipeline_hc$criterion_lax = sum(xsection$hormonal_lax, na.rm = T)
excluded_old = NULL
xsection[, include_conservative := include_lax]
n_excluded(xsection$include_conservative)
## 1065 excluded
## [1] 1065
xsection[include_conservative == T, include_conservative := ! cigarettes > 10]
pipeline$heavy_smokers = n_excluded(xsection$include_conservative)
## 4 excluded
xsection[include_conservative == T, include_conservative := ! intensive_sports > 7]
pipeline$intensive_sports = n_excluded(xsection$include_conservative)
## 6 excluded
xsection[include_conservative == T, include_conservative := ! breakup == 1]
pipeline$broke_up = n_excluded(xsection$include_conservative)
## 3 excluded
xsection[include_conservative == T, include_conservative := ! change_contraception_1 %contains% "1"]
pipeline$changed_contraception = n_excluded(xsection$include_conservative)
## 0 excluded
xsection[include_conservative == T, include_conservative := weight_post - weight > -8]
pipeline$lost_more_than_8kg = n_excluded(xsection$include_conservative)
## 2 excluded
xsection[, BMI := ((weight_post + weight)/2) /((height/100)^2)] #creating new variable "BMI" in dataset
qplot(xsection$BMI)
xsection[include_conservative == T, include_conservative := BMI > 17]
pipeline$BMI_lt_17 = n_excluded(xsection$include_conservative)
## 3 excluded
xsection[include_conservative == T, include_conservative := BMI < 30]
pipeline$BMI_gt_30 = n_excluded(xsection$include_conservative)
## 8 excluded
table(xsection$cyklus_5) #show if and how much cycle was off during the last three months
##
## 1 2 3
## 779 237 192
xtabs(~ cyklus_5 + cyklus_6, data = xsection)
## cyklus_6
## cyklus_5 1 2 3 4 5
## 1 18 10 23 128 600
## 2 9 15 34 72 107
## 3 22 4 12 31 123
xsection[include_conservative == T, include_conservative := cyklus_5 < 3 | cyklus_6 == 5] #exclude those with irregular cycles, IF they have expressed some certainty in this
pipeline$confident_that_cycle_irregular = n_excluded(xsection$include_conservative)
## 5 excluded
pipeline$criterion_conservative = sum(xsection$include_conservative, na.rm = T)
excluded_old = NULL
xsection[, hormonal_conservative := hormonal_lax]
n_excluded(xsection$hormonal_conservative)
## 834 excluded
## [1] 834
xsection[hormonal_conservative == T, hormonal_conservative := ! cigarettes > 10]
pipeline_hc$heavy_smokers = n_excluded(xsection$hormonal_conservative)
## 10 excluded
xsection[hormonal_conservative == T, hormonal_conservative := ! intensive_sports > 7]
pipeline_hc$intensive_sports = n_excluded(xsection$hormonal_conservative)
## 18 excluded
xsection[hormonal_conservative == T, hormonal_conservative := ! breakup == 1]
pipeline_hc$broke_up = n_excluded(xsection$hormonal_conservative)
## 10 excluded
xsection[hormonal_conservative == T, hormonal_conservative := ! change_contraception_1 %contains% "1"]
pipeline_hc$changed_contraception = n_excluded(xsection$hormonal_conservative)
## 14 excluded
xsection[hormonal_conservative == T, hormonal_conservative := weight_post - weight > -8]
pipeline_hc$lost_more_than_8kg = n_excluded(xsection$hormonal_conservative)
## 6 excluded
xsection[, BMI := ((weight_post + weight)/2) /((height/100)^2)] #creating new variable "BMI" in dataset
qplot(xsection$BMI)
xsection[hormonal_conservative == T, hormonal_conservative := BMI > 17]
pipeline_hc$BMI_lt_17 = n_excluded(xsection$hormonal_conservative)
## 7 excluded
xsection[hormonal_conservative == T, hormonal_conservative := BMI < 30]
pipeline_hc$BMI_gt_30 = n_excluded(xsection$hormonal_conservative)
## 10 excluded
table(xsection$cyklus_5) #show if and how much cycle was off during the last three months
##
## 1 2 3
## 779 237 192
xtabs(~ cyklus_5 + cyklus_6, data = xsection)
## cyklus_6
## cyklus_5 1 2 3 4 5
## 1 18 10 23 128 600
## 2 9 15 34 72 107
## 3 22 4 12 31 123
xsection[hormonal_conservative == T, hormonal_conservative := cyklus_5 < 3 | cyklus_6 == 5] #exclude those with irregular cycles, IF they have expressed some certainty in this
pipeline_hc$confident_that_cycle_irregular = n_excluded(xsection$hormonal_conservative)
## 6 excluded
xsection[, included_bool := include_conservative & ! hormonal_conservative]
xsection[ hormonal_conservative == F & include_conservative == F, included_bool := NA]
xsection[, included_conservative := factor(ifelse(included_bool, "cycling", "horm_contra"))]
is.na(xsection$included_conservative)[is.na(xsection$included_bool)] <- TRUE
crosstabs( ~factor(xsection$included_conservative,exclude = NULL))
## factor(xsection$included_conservative, exclude = NULL)
## cycling horm_contra <NA>
## 112 294 802
pipeline_hc$criterion_conservative = sum(xsection$hormonal_conservative, na.rm = T)
112 non-hormonally contracepting women and 293 hormonally contracepting women are included in the sample with conservative exclusion criteria.
excluded_old = NULL
xsection[, include_strict := include_conservative]
n_excluded(xsection$include_strict)
## 1096 excluded
## [1] 1096
xsection[include_strict == T, include_strict := cyklus_5 < 3] #exclude those with irregular cycles, even the uncertain ones
pipeline$cycle_possibly_irregular = n_excluded(xsection$include_strict)
## 12 excluded
xsection[include_strict == T, include_strict := ! stress == 1]
pipeline$stressed = n_excluded(xsection$include_strict)
## 43 excluded
pipeline$criterion_strict = sum(xsection$include_strict, na.rm = T)
excluded_old = NULL
xsection[, hormonal_strict := hormonal_conservative]
n_excluded(xsection$hormonal_strict)
## 915 excluded
## [1] 915
xsection[hormonal_strict == T, hormonal_strict := cyklus_5 < 3] #exclude those with irregular cycles, even the uncertain ones
pipeline_hc$cycle_possibly_irregular = n_excluded(xsection$hormonal_strict)
## 12 excluded
xsection[hormonal_strict == T, hormonal_strict := ! stress == 1]
pipeline_hc$stressed = n_excluded(xsection$hormonal_strict)
## 120 excluded
pipeline_hc$criterion_strict = sum(xsection$hormonal_strict, na.rm = T)
xsection[, included_bool := include_strict & ! hormonal_strict]
xsection[ hormonal_strict == F & include_strict == F, included_bool := NA]
xsection[, included_strict := factor(ifelse(included_bool, "cycling", "horm_contra"))]
is.na(xsection$included_strict)[is.na(xsection$included_bool)] <- TRUE
crosstabs( ~factor(xsection$included_strict,exclude = NULL))
## factor(xsection$included_strict, exclude = NULL)
## cycling horm_contra <NA>
## 57 162 989
crosstabs(~ factor(included_strict, exclude = NULL), xsection[!is.na(included_strict), ])
## factor(included_strict, exclude = NULL)
## cycling horm_contra
## 57 162
57 non-hormonally contracepting women and 161 hormonally contracepting women are included in the sample with overly strict exclusion criteria.
crosstabs(~ factor(included_strict, exclude = NULL), xsection[!is.na(included_strict), ])
## factor(included_strict, exclude = NULL)
## cycling horm_contra
## 57 162
xsection$included_levels = ifelse(!is.na(xsection$included_all),"all", NA_character_)
xsection$included_levels = ifelse(!is.na(xsection$included_lax), "lax", xsection$included_levels)
xsection$included_levels = ifelse(!is.na(xsection$included_conservative), "conservative", xsection$included_levels)
xsection$included_levels = ifelse(!is.na(xsection$included_strict), "strict", xsection$included_levels)
xsection$included_levels = factor(xsection$included_levels, levels = c("all", "lax", "conservative", "strict"))
xsection[, hormonal := contraception %contains% "contraceptive_pill" | contraception %contains% "other_hormonal"]
crosstabs(~ factor(xsection$included_levels, exclude = NULL))
## factor(xsection$included_levels, exclude = NULL)
## all lax conservative strict <NA>
## 537 111 187 219 154
crosstabs(~ factor(included_levels, exclude = NULL) + hormonal, data = xsection)
## hormonal
## factor(included_levels, exclude = NULL) FALSE TRUE <NA>
## all 286 251 0
## lax 31 80 0
## conservative 55 132 0
## strict 57 162 0
## <NA> 85 68 1
See descriptives.
library(DiagrammeR)
pipeline$hc = pipeline_hc
save(pipeline, file = "pipeline.rdata")
Save the data.
new_names = c("session", setdiff(names(xsection), prev_names))
diary = merge(xsection %>% select(one_of(new_names)), diary, by = "session", all.x = T)
save( xsection, diary, file = "full_data.rdata")