library(knitr)
opts_chunk$set(fig.width = 12, fig.height = 12, cache = F, warning = F, message = F)
source("0_helpers.R")
load("full_data.rdata")
diary = diary %>%
mutate(
included = included_all,
fertile = if_else(is.na(prc_stirn_b_squished), prc_stirn_b_backward_inferred, prc_stirn_b_squished),
contraceptive_methods = factor(contraceptive_method, levels =
c("barrier_or_abstinence", "fertility_awareness", "none", "hormonal")),
relationship_status_clean = factor(relationship_status_clean),
cohabitation = factor(cohabitation),
certainty_menstruation = as.numeric(as.character(certainty_menstruation))
) %>% group_by(person) %>%
mutate(
fertile_mean = mean(fertile, na.rm = T)
)
These fields were stored as text, but can be simplified.
diary = diary %>% mutate(
want_more_info = as.integer(want_more_info),
trying_to_get_pregnant = if_else(trying_to_get_pregnant == 'yes', 1, 0),
had_sex_with_partner_yet = if_else(had_sex_with_partner_yet == 'yes', 1, 0),
breast_feeding_in_last_3_months = if_else(breast_feeding_in_last_3_months == 'yes', 1, 0),
hormonal_medication_in_last_3_months = if_else(hormonal_medication_in_last_3_months == 'yes', 1, 0),
pill_in_last_3_months = if_else(pill_in_last_3_months == 'yes', 1, 0)
)
Free text fields often have unique values and can contain things like IDs, addresses, free-text responses to questions, etc.
# first I look only at character columns (they tend to have most unique values)
diary %>% select_if(is.character) %>%
gather(variable, value) %>% # putting this in long format allows me to easily
group_by(variable) %>%
summarise(n = n_nonmissing(value), n_dist = n_distinct(value)) %>% # summarise frequencies of values
pander() # and display them
variable | n | n_dist |
---|---|---|
change_contraception_2 | 1057 | 25 |
children | 30956 | 10 |
children_broad_categories | 30956 | 2 |
children_narrow_categories | 30956 | 5 |
contraception | 30952 | 73 |
contraceptive_method | 30956 | 5 |
contraceptive_method_by_pearl | 30956 | 5 |
contraceptives_broad_categories | 30952 | 11 |
contraceptives_categories_natural | 29911 | 6 |
feedback_for_us | 6114 | 176 |
gestagen_type | 16178 | 12 |
hormonal_2 | 1716 | 9 |
hypothesis_guessed | 30956 | 6 |
illness_2 | 9284 | 167 |
income_partner | 30956 | 5 |
living_situation | 30956 | 4 |
meaning_study | 14437 | 414 |
medicament_2 | 11349 | 278 |
method_meeting | 16257 | 10 |
method_meeting_clean | 16222 | 7 |
name_hormonal | 195 | 7 |
occupation | 30956 | 14 |
occupation_clean | 30850 | 7 |
other_pill_name | 1531 | 43 |
pills | 16212 | 72 |
postal_code | 30956 | 403 |
pregnant_in_last_3_months | 30956 | 2 |
relationship_status | 30956 | 15 |
religion | 30956 | 18 |
religion_clean | 30881 | 6 |
sess_day | 30924 | 30925 |
session | 30956 | 1208 |
session_id | 30924 | 30925 |
session_id.abschluss | 26913 | 785 |
session_id.vorab | 30956 | 1208 |
sex_orientation | 30956 | 5 |
short | 30956 | 1208 |
short_session | 30924 | 1177 |
special_days | 16985 | 403 |
special_events | 2836 | 2010 |
# of course manual inspection (for me, using View() ) is important too
# same thing for factors shows no rare unique values
diary %>% select_if(is.factor) %>%
gather(variable, value) %>%
group_by(variable) %>%
summarise(n = n_nonmissing(value), n_dist = n_distinct(value)) %>%
arrange(n_dist) %>%
pander()
variable | n | n_dist |
---|---|---|
weekend | 30956 | 2 |
cohabitation | 30956 | 3 |
cycle_regularity | 30956 | 3 |
days_with_partner | 30956 | 3 |
fertile_window | 11682 | 3 |
fertile_window_backward_inferred | 17013 | 3 |
fertile_window_forward_counted | 16201 | 3 |
fertile_window_squished | 11877 | 3 |
included | 28493 | 3 |
included_all | 28493 | 3 |
included_conservative | 14147 | 3 |
included_lax | 17960 | 3 |
included_strict | 7682 | 3 |
menstruation | 30956 | 3 |
nights_with_partner | 30956 | 3 |
relationship_status_clean | 30956 | 3 |
cycle_length_groups | 30956 | 4 |
estrogen_categories | 26785 | 4 |
menstruation_strength | 30956 | 4 |
contraceptive_methods | 30357 | 5 |
included_levels | 28493 | 5 |
age_group | 30956 | 6 |
days_with_partner_per_month | 9538 | 6 |
income | 13229 | 6 |
week_number | 30924 | 7 |
distance_to_partner_hours | 9538 | 8 |
weekday | 30924 | 8 |
gestagens | 1497 | 10 |
# diary %>% select_if(is.character) %>% names() %>% cat(sep=", -")
# here I remove all columns that have text fields which might betray someone's identity
diary = diary %>% select(-session, -session_id, -session_id.vorab, -short, -sess_day, -session_id.abschluss, -short_session, -vpn, -session_id.nachbe_other_hormonal,
-pills,-other_pill_name, -religion, -relationship_status, -postal_code, -children, -method_meeting, -meaning_study, -medicament_2, -special_days, -illness_2, -change_contraception_2, -feedback_for_us, -name_hormonal, -hypothesis_guessed, -children_narrow_categories, -special_events)
diary %>% select_if(is.instant) %>%
gather(variable, value) %>%
group_by(variable) %>%
summarise(n = n_nonmissing(value), n_dist = n_distinct(value)) %>%
arrange(n_dist) %>%
pander()
variable | n | n_dist |
---|---|---|
ended.nachbe_other_hormonal | 1716 | 52 |
created.nachbe_other_hormonal | 1804 | 55 |
modified.nachbe_other_hormonal | 1804 | 55 |
first_day | 30924 | 209 |
first_diary_date | 30924 | 209 |
cyklus_1 | 30956 | 306 |
last_menstruation | 27362 | 308 |
next_menstrual_onset | 18614 | 330 |
menstrual_onset_date_inferred | 4068 | 363 |
last_diary_date | 30924 | 371 |
menstruation_dates | 4805 | 437 |
next_menstrual_onset_inferred | 30924 | 450 |
last_menstrual_onset | 30924 | 453 |
created_date | 30924 | 532 |
ended.abschluss | 26432 | 770 |
created.abschluss | 26913 | 785 |
modified.abschluss | 26913 | 785 |
created.vorab | 30956 | 1206 |
modified.vorab | 30956 | 1206 |
ended.vorab | 30956 | 1207 |
ended | 30924 | 30582 |
modified | 30924 | 30795 |
created | 30924 | 30797 |
diary = diary %>%
# remove all datetime we don't need
select(-starts_with("created")) %>%
select(-starts_with("modified")) %>%
select(-starts_with("expired")) %>%
select(-first_day, -first_diary_date, -cyklus_1, -last_menstruation, -next_menstrual_onset, -menstrual_onset_date_inferred, -last_diary_date, -menstruation_dates, -next_menstrual_onset_inferred, -last_menstrual_onset) %>%
mutate( # we do want to know whether they finished a survey, but can't know when
ended = !is.na(ended),
ended.vorab = !is.na(ended.vorab),
ended.abschluss = !is.na(ended.abschluss),
ended.nachbe_other_hormonal = !is.na(ended.nachbe_other_hormonal)
)
a similar problem exists for intervals, but we might need these and I can easily round them. Here I unfortunately didn’t store them as type interval, so I have to look through all numeric types
# diary %>% select_if(is.interval) %>%
# gather(variable, value) %>%
# group_by(variable) %>%
# summarise(n = n_nonmissing(value), n_dist = n_distinct(value)) %>%
# arrange(n_dist) %>%
# pander()
The Hmisc::cut2
function allows me to group variables into g
groups and try to make no group contain fewer than m
values.
diary = diary %>%
mutate(
time_for_response = Hmisc::cut2(time_for_response, g = 10, m = 30),
time_of_response = Hmisc::cut2(time_of_response, g = 10, m = 30),
time_since_last_response = Hmisc::cut2(time_since_last_response, cuts = 10, m = 30),
biggest_diff = Hmisc::cut2(biggest_diff, g = 10, m = 30),
avg_diff = Hmisc::cut2(avg_diff, g = 10, m = 300),
days_responded_percentage = Hmisc::cut2(days_responded_percentage, g = 10, m = 300)
)
Although rare values won’t allow anyone to identify many participants, it is important to protect privacy for all participants. In our case especially, a potential attacker might know that e.g. his girlfriend uses a pill with a rare gestagen and identify her like this.
To this end, I wrote the function below. It gives (depending on the last argument) the rarest value and how frequent it is. I included the group variable, because a lot of demographic data is duplicated in my diary data.
commonness_rarest_value = function(x, group, give_value = FALSE) {
df = data.frame(x = x, group = group) %>% unique()
counts = table(df$x)
ret = counts[which.min(counts)]
if (give_value) names(ret) else ret
}
diary %>% gather(variable, value, -person) %>%
group_by(variable) %>%
summarise(n = n_nonmissing(value), n_dist = n_distinct(value), lc_val = commonness_rarest_value(value, person, T), lc_freq = commonness_rarest_value(value, person, F)) %>%
arrange(desc(lc_freq/(n_dist+lc_freq))) %>%
pander()
variable | n | n_dist | lc_val | lc_freq |
---|---|---|---|---|
ended.vorab | 30956 | 1 | TRUE | 1208 |
gender | 30956 | 1 | 1 | 1208 |
hetero_relationship | 30956 | 1 | 1 | 1208 |
weekend | 30956 | 2 | TRUE | 1122 |
time_since_last_response | 23275 | 2 | [10,31] | 1097 |
menstruated_at_all | 30956 | 2 | TRUE | 910 |
in_pair_public_intimacy | 29902 | 3 | 0 | 1081 |
mate_retention_1 | 29902 | 3 | 2 | 960 |
spent_night_with_partner | 29902 | 3 | 0 | 960 |
fertile_broad_backward_inferred | 17013 | 3 | 0.0533333333333333 | 959 |
had_sexual_intercourse | 29902 | 3 | 1 | 945 |
premenstrual_phase_backward_inferred | 27143 | 3 | TRUE | 929 |
premenstrual_phase_fab | 27143 | 3 | TRUE | 929 |
fertile_broad_forward_counted | 16201 | 3 | 0.0533333333333333 | 928 |
fertile_window_forward_counted | 16201 | 3 | broad | 919 |
fertile_window_backward_inferred | 17013 | 3 | broad | 915 |
menstruation_1 | 29901 | 3 | 1 | 910 |
fertile_narrow_forward_counted | 12407 | 3 | 0.51 | 908 |
fertile_narrow_backward_inferred | 13345 | 3 | 0.51 | 905 |
had_petting | 29901 | 3 | 1 | 904 |
pill_contraception | 30956 | 2 | 0 | 598 |
premenstrual_phase_forward_counted | 27380 | 3 | TRUE | 873 |
contraception_meeting_partner | 30956 | 2 | 2 | 581 |
pill_in_last_3_months | 30956 | 2 | 0 | 566 |
pille_control | 30956 | 2 | 2 | 566 |
menstruation | 30956 | 3 | pre | 819 |
premenstrual_phase | 17887 | 3 | TRUE | 819 |
premenstrual_phase_squished | 17944 | 3 | TRUE | 807 |
sufficient_diary_coverage | 30956 | 2 | FALSE | 518 |
partner_initiated_sexual_intercourse | 6512 | 3 | 0 | 772 |
sexual_intercourse_3 | 6512 | 3 | 1 | 772 |
hormonal_contraception | 30956 | 2 | 0 | 514 |
menstrual_onset | 18614 | 3 | TRUE | 726 |
fertile_broad_squished | 11877 | 3 | 0.44 | 697 |
fertile_broad | 11682 | 3 | 0.44 | 694 |
fertile_window_squished | 11877 | 3 | broad | 683 |
fertile_window | 11682 | 3 | broad | 675 |
fertile_narrow | 9648 | 3 | 0.51 | 670 |
fertile_narrow_squished | 9702 | 3 | 0.51 | 667 |
ended.abschluss | 30956 | 2 | FALSE | 439 |
hormonal_contra | 30952 | 3 | TRUE | 602 |
hormonal_all | 30952 | 3 | FALSE | 585 |
mate_retention_2 | 29902 | 4 | 1 | 758 |
dodgy_data | 30924 | 3 | TRUE | 566 |
any_RCD | 30956 | 2 | FALSE | 368 |
SJS_5 | 30956 | 2 | 2 | 357 |
SJS_5R | 30956 | 2 | 1 | 357 |
hormonal | 30952 | 3 | FALSE | 514 |
SJS_6 | 30956 | 2 | 2 | 312 |
ever_menstruated | 30956 | 2 | FALSE | 298 |
had_any_period | 30956 | 2 | FALSE | 298 |
sexual_intercourse_2 | 29902 | 4 | 2 | 595 |
SJS_4 | 30956 | 2 | 2 | 286 |
sexual_intercourse_5 | 29901 | 4 | 2 | 567 |
include_all | 30952 | 3 | TRUE | 421 |
included | 28493 | 3 | cycling | 421 |
included_all | 28493 | 3 | cycling | 421 |
antibiotics | 30956 | 2 | 1 | 279 |
hormonal_medication_in_last_3_months | 30956 | 2 | 1 | 279 |
weekday | 30924 | 8 | Saturday | 1034 |
hormonal_lax | 30952 | 3 | TRUE | 374 |
week_number | 30924 | 7 | (28,35] | 833 |
stress | 26606 | 3 | 1 | 355 |
days_with_partner | 30956 | 3 | 3-5 days | 346 |
cohabitation | 30956 | 3 | Live in same city | 334 |
long_distance_relationship | 30956 | 3 | 2 | 334 |
sexual_intercourse_1 | 29902 | 6 | 1 | 663 |
sexual_intercourse_1_6scale | 29902 | 6 | 1.2 | 663 |
medicament_1 | 26606 | 3 | 1 | 330 |
nights_with_partner | 30956 | 3 | 3-5 nights | 329 |
attention_2 | 29874 | 7 | 6 | 766 |
SJS_3 | 30956 | 2 | 2 | 213 |
SJS_3R | 30956 | 2 | 1 | 213 |
male_attention_1 | 29874 | 7 | 2 | 739 |
situation_of_living | 18107 | 3 | 1 | 294 |
hormonal_conservative | 30940 | 3 | TRUE | 293 |
menstruation_3 | 4794 | 4 | 3 | 390 |
menstruation_strength | 30956 | 4 | 3 | 390 |
days_since_menstrual_onset | 4068 | 6 | 4 | 571 |
SJS_1 | 30956 | 2 | 2 | 188 |
illness_1 | 26606 | 3 | 1 | 271 |
children_broad_categories | 30956 | 2 | children | 180 |
mate_retention_3 | 29876 | 7 | 6 | 594 |
desirability_1 | 29881 | 7 | 1 | 562 |
choice_of_clothing_3 | 29887 | 7 | 1 | 554 |
communication_partner_1 | 29902 | 4 | 1 | 311 |
choice_of_clothing_6 | 29883 | 7 | 6 | 541 |
desirability_partner | 29879 | 7 | 1 | 537 |
time_of_response | 30924 | 11 | [17.0,17.3) | 835 |
any_fertile_days_known | 30956 | 2 | FALSE | 147 |
attention_1 | 29874 | 7 | 2 | 510 |
jealousy_1 | 29877 | 7 | 6 | 463 |
extra_pair_3 | 29873 | 7 | 6 | 451 |
cycle_regularity | 30956 | 3 | irregular, more than 5 days off | 192 |
cyklus_5 | 30956 | 3 | 3 | 192 |
extra_pair_6 | 29873 | 7 | 2 | 445 |
male_mate_retention_1 | 29876 | 7 | 6 | 444 |
choice_of_clothing_1 | 29896 | 7 | 6 | 431 |
mate_retention_4 | 29876 | 7 | 2 | 427 |
male_mate_retention_2 | 29876 | 7 | 2 | 418 |
mate_retention_6 | 29875 | 7 | 6 | 417 |
flat_share | 10737 | 3 | 1 | 173 |
extra_pair_12 | 29873 | 7 | 6 | 389 |
hormonal_strict | 30940 | 3 | TRUE | 161 |
SJS_2 | 30956 | 2 | 2 | 107 |
SJS_2R | 30956 | 2 | 1 | 107 |
menstruation_2 | 4794 | 7 | 6 | 368 |
extra_pair_5 | 29873 | 7 | 2 | 348 |
choice_of_clothing_4 | 29884 | 7 | 6 | 343 |
time_for_response | 29143 | 11 | [0.10, 2.15) | 531 |
extra_pair_2 | 29874 | 7 | 6 | 336 |
we_know_fertile_days | 30956 | 2 | FALSE | 96 |
include_lax | 30952 | 3 | TRUE | 143 |
included_lax | 17960 | 3 | cycling | 143 |
self_esteem_1 | 29881 | 7 | 1 | 309 |
BFI_consc_9 | 30956 | 5 | 3 | 219 |
BFI_consc_9R | 30956 | 5 | 3 | 219 |
living_situation | 30956 | 4 | living in all-female flatshare | 173 |
choice_of_clothing_8 | 29882 | 7 | 6 | 291 |
showy_clothes | 29882 | 7 | 6 | 291 |
extra_pair_1 | 29867 | 3 | 1 | 124 |
extra_pair_intimacy | 29867 | 3 | 1 | 124 |
NARQ_admiration_3 | 29877 | 7 | 6 | 289 |
estrogen_categories | 26785 | 4 | (300,600] | 153 |
mate_retention_5 | 29876 | 7 | 6 | 265 |
include_conservative | 30952 | 3 | TRUE | 112 |
included_conservative | 14147 | 3 | cycling | 112 |
SOI_R_6 | 30956 | 5 | 2 | 183 |
SOI_R_6R | 30956 | 5 | 4 | 183 |
prc_stirn_b_forward_counted | 27380 | 20 | 0.14 | 687 |
fertile_fab | 27143 | 20 | 0.05 | 679 |
fertile_forward_and_backward | 27143 | 20 | 0.05 | 679 |
prc_stirn_b_backward_inferred | 27143 | 20 | 0.05 | 679 |
SOI_R_5 | 30956 | 5 | 3 | 169 |
extra_pair_13 | 29873 | 7 | 6 | 234 |
extra_pair_sexual_fantasies | 29873 | 7 | 6 | 234 |
choice_of_clothing_2 | 29892 | 7 | 1 | 233 |
MV_2 | 30956 | 5 | 1 | 162 |
attention | 29874 | 12 | 1.5 | 382 |
choice_of_clothing_7 | 29881 | 7 | 6 | 222 |
fertile | 27828 | 20 | 0.16 | 619 |
attractiveness_finance_2 | 30956 | 5 | 5 | 152 |
income_partner | 30956 | 5 | > 3000€ | 152 |
NARQ_admiration_2 | 29877 | 7 | 6 | 211 |
SOI_R_4 | 30956 | 5 | 1 | 148 |
NARQ_admiration_1 | 29877 | 7 | 6 | 197 |
BFI_open_10 | 30956 | 5 | 1 | 136 |
choice_of_clothing_5 | 29885 | 7 | 6 | 186 |
extra_pair_7 | 29873 | 7 | 6 | 186 |
had_sex_with_partner_yet | 30956 | 2 | 0 | 53 |
male_mate_retention | 29876 | 12 | 6 | 314 |
relationship_satisfaction_1 | 29902 | 8 | 4.5 | 208 |
extra_pair_9 | 29873 | 7 | 6 | 179 |
ended.nachbe_other_hormonal | 30956 | 2 | TRUE | 51 |
extra_pair_10 | 29873 | 7 | 6 | 178 |
BFI_agree_6 | 30956 | 5 | 1 | 126 |
BFI_agree_6R | 30956 | 5 | 5 | 126 |
BFI_extra_7 | 30956 | 5 | 5 | 126 |
BFI_extra_7R | 30956 | 5 | 1 | 126 |
trying_to_get_pregnant | 30956 | 2 | 1 | 50 |
prc_wcx_b_forward_counted | 27380 | 26 | 0.006 | 624 |
CJS_6 | 30956 | 5 | 5 | 119 |
prc_wcx_b_backward_inferred | 27143 | 26 | 0.001 | 603 |
BFI_agree_4 | 30956 | 5 | 1 | 113 |
NARQ_1 | 30956 | 5 | 5 | 112 |
BFI_consc_4 | 30956 | 5 | 1 | 111 |
BFI_consc_4R | 30956 | 5 | 5 | 111 |
included_levels | 28493 | 5 | lax | 111 |
SGSE_5 | 30956 | 5 | 5 | 111 |
BFI_neuro_7 | 30956 | 5 | 5 | 108 |
RJS_1 | 30956 | 5 | 5 | 108 |
CJS_3 | 30956 | 5 | 1 | 106 |
MV_P_2 | 30956 | 5 | 1 | 105 |
SGSE_4 | 30956 | 5 | 5 | 104 |
SGSE_4R | 30956 | 5 | 1 | 104 |
extra_pair_going_out | 29873 | 12 | 5.5 | 242 |
male_jealousy_1 | 29876 | 7 | 6 | 141 |
male_jealousy_2 | 29877 | 7 | 6 | 138 |
BFI_neuro_8 | 30956 | 5 | 1 | 98 |
NARQ_6 | 30956 | 5 | 5 | 98 |
SOI_R_3 | 30956 | 5 | 5 | 97 |
cyklus_4 | 30956 | 5 | 1 | 95 |
include_strict | 30952 | 3 | TRUE | 57 |
included_bool | 7682 | 3 | TRUE | 57 |
included_strict | 7682 | 3 | cycling | 57 |
extra_pair_1b | 399 | 3 | 1 | 54 |
extra_pair_sex | 30924 | 3 | 1 | 54 |
fertile_cont | 17887 | 20 | 0.05 | 360 |
prc_stirn_b | 17887 | 20 | 0.05 | 360 |
extra_pair_8 | 29873 | 7 | 6 | 125 |
RJS_3 | 30956 | 5 | 5 | 89 |
BFI_neuro_6 | 30956 | 5 | 1 | 87 |
BFI_neuro_6R | 30956 | 5 | 5 | 87 |
relationship_status_clean | 30956 | 3 | Verlobt | 52 |
BFI_agree_1 | 30956 | 5 | 5 | 86 |
BFI_agree_1R | 30956 | 5 | 1 | 86 |
BFI_consc_8 | 30956 | 5 | 1 | 85 |
BFI_consc_8R | 30956 | 5 | 5 | 85 |
BFI_neuro_5 | 30956 | 5 | 5 | 85 |
BFI_neuro_5R | 30956 | 5 | 1 | 85 |
BFI_extra_2 | 30956 | 5 | 1 | 84 |
BFI_extra_2R | 30956 | 5 | 5 | 84 |
MV_P_5 | 30956 | 5 | 1 | 84 |
MV_P_5R | 30956 | 5 | 5 | 84 |
extra_pair_4 | 29873 | 7 | 6 | 116 |
BFI_agree_8 | 30956 | 5 | 1 | 82 |
BFI_agree_8R | 30956 | 5 | 5 | 82 |
BFI_neuro_3 | 30956 | 5 | 1 | 81 |
SGSE_1 | 30956 | 5 | 5 | 81 |
ended | 30956 | 2 | FALSE | 32 |
attractiveness_stp_self | 30956 | 5 | 1 | 79 |
CJS_2 | 30956 | 5 | 5 | 79 |
attractiveness_finance_1 | 30956 | 5 | 5 | 78 |
extra_pair_compliments | 29873 | 12 | 6 | 187 |
male_jealousy_3 | 29876 | 7 | 6 | 109 |
SGSE_3 | 30956 | 5 | 5 | 76 |
SGSE_3R | 30956 | 5 | 1 | 76 |
attractiveness_stp | 30956 | 5 | 1 | 75 |
NARQ_7 | 30956 | 5 | 1 | 75 |
extra_pair_11 | 29873 | 7 | 6 | 102 |
NARQ_rivalry_1 | 29876 | 7 | 6 | 100 |
MV_5 | 30956 | 5 | 1 | 71 |
MV_5R | 30956 | 5 | 5 | 71 |
NARQ_3 | 30956 | 5 | 5 | 71 |
SOI_R_7 | 30956 | 5 | 5 | 70 |
NARQ_rivalry_2 | 29877 | 7 | 6 | 97 |
BFI_neuro_2 | 30956 | 5 | 1 | 69 |
BFI_neuro_2R | 30956 | 5 | 5 | 69 |
prc_stirn_b_squished | 17944 | 20 | 0.16 | 271 |
has_not_had_sex_yet | 30956 | 2 | 1 | 27 |
BFI_neuro_1 | 30956 | 5 | 5 | 67 |
cyklus_2 | 30956 | 5 | 2 | 66 |
pregnant | 30956 | 2 | 1 | 26 |
pregnant_in_last_3_months | 30956 | 2 | yes | 26 |
NARQ_15 | 30956 | 5 | 5 | 64 |
NARQ_16 | 30956 | 5 | 5 | 62 |
cycle_length_groups | 30956 | 4 | (35,41] | 49 |
BFI_extra_8 | 30956 | 5 | 1 | 61 |
contraceptive_methods | 30357 | 5 | fertility_awareness | 61 |
SOI_R_2 | 30956 | 5 | 5 | 60 |
RJS_5 | 30956 | 5 | 5 | 59 |
BFI_open_7 | 30956 | 5 | 1 | 58 |
BFI_open_7R | 30956 | 5 | 5 | 58 |
RJS_6 | 30956 | 5 | 5 | 58 |
BFI_open_5 | 30956 | 5 | 1 | 56 |
ZIP_7 | 30956 | 5 | 1 | 56 |
ZIP_7R | 30956 | 5 | 5 | 56 |
NARQ_11 | 30956 | 5 | 1 | 55 |
want_more_info | 26606 | 3 | 0 | 33 |
prc_wcx_b | 17887 | 26 | 0 | 277 |
prc_wcx_b_squished | 17944 | 26 | 0.018 | 271 |
change_contraception_1 | 26606 | 3 | 1 | 31 |
BFI_open_9 | 30956 | 5 | 1 | 51 |
BFI_open_9R | 30956 | 5 | 5 | 51 |
CJS_1 | 30956 | 5 | 5 | 51 |
attractiveness_relativ_1 | 30956 | 4 | 4 | 40 |
breast_feeding | 30956 | 2 | 1 | 20 |
breast_feeding_in_last_3_months | 30956 | 2 | 1 | 20 |
SGSE_2 | 30956 | 5 | 5 | 49 |
BFI_open_1 | 30956 | 5 | 1 | 47 |
age_group | 30956 | 6 | (45,70] | 56 |
BFI_extra_6 | 30956 | 5 | 1 | 45 |
BFI_open_6 | 30956 | 5 | 1 | 44 |
biggest_diff | 30956 | 10 | [ 2.04, 2.13) | 88 |
NARQ_18 | 30956 | 5 | 1 | 44 |
NARQ_9 | 30956 | 5 | 5 | 44 |
BFI_consc_2 | 30956 | 5 | 1 | 43 |
BFI_consc_2R | 30956 | 5 | 5 | 43 |
NARQ_5 | 30956 | 5 | 1 | 43 |
MV_P_1 | 30956 | 5 | 5 | 42 |
attractiveness_finance_self | 13229 | 6 | 5 | 49 |
income | 13229 | 6 | > 3000€ | 49 |
MV_4 | 30956 | 5 | 1 | 40 |
avg_diff | 30885 | 11 | [1.055, 1.09) | 84 |
BFI_extra_5 | 30956 | 5 | 1 | 38 |
BFI_extra_5R | 30956 | 5 | 5 | 38 |
sexy_clothes | 30924 | 18 | 6 | 133 |
breakup | 26606 | 3 | 1 | 22 |
communication_partner_2 | 28857 | 8 | 6 | 58 |
BFI_extra_4 | 30956 | 5 | 1 | 36 |
BFI_extra_3 | 30956 | 5 | 1 | 34 |
BFI_neuro_4 | 30956 | 5 | 1 | 34 |
NARQ_rivalry_3 | 29876 | 7 | 6 | 46 |
BFI_agree_5 | 30956 | 5 | 1 | 32 |
SOI_R_8 | 30956 | 5 | 5 | 32 |
religiosity | 30956 | 5 | 5 | 31 |
RJS_4 | 30956 | 5 | 5 | 31 |
days_responded_percentage | 30885 | 11 | [0.95122,0.976) | 67 |
CJS_5 | 30956 | 5 | 5 | 30 |
NARQ_8 | 30956 | 5 | 5 | 30 |
RJS_2 | 30956 | 5 | 5 | 30 |
BFI_consc_5 | 30956 | 5 | 1 | 29 |
certainty_menstruation | 30956 | 5 | 1 | 29 |
cyklus_6 | 30956 | 5 | 2 | 29 |
SOI_R_9 | 30956 | 5 | 5 | 29 |
attractiveness_occupation | 30956 | 5 | 1 | 28 |
contraceptive_method | 30956 | 5 | other | 27 |
MV_P_4 | 30956 | 5 | 1 | 27 |
NARQ_admiration | 29877 | 17 | 6 | 88 |
SOI_uv | 30956 | 13 | 1.33333333333333 | 67 |
SJS | 30956 | 7 | 2 | 36 |
BFI_open_8 | 30956 | 5 | 1 | 25 |
sexual_intercourse_satisfaction | 6511 | 8 | 1 | 38 |
BFI_consc_6 | 30956 | 5 | 1 | 23 |
contraceptive_method_by_pearl | 30956 | 5 | fertility_awareness | 23 |
attractiveness_finance | 30956 | 9 | 5 | 41 |
CJS_4 | 30956 | 5 | 5 | 22 |
MV_1 | 30956 | 5 | 5 | 22 |
BFI_extra_1 | 30956 | 5 | 1 | 21 |
NARQ_4 | 30956 | 5 | 5 | 21 |
BFI_agree_9 | 30956 | 5 | 1 | 20 |
BFI_open_4 | 30956 | 5 | 1 | 20 |
NARQ_10 | 30956 | 5 | 5 | 19 |
ZIP_5 | 30956 | 5 | 1 | 19 |
BFI_agree_3 | 30956 | 5 | 1 | 18 |
BFI_agree_3R | 30956 | 5 | 5 | 18 |
BFI_consc_7 | 30956 | 5 | 1 | 18 |
no_variation | 29891 | 3 | TRUE | 10 |
BFI_consc_1 | 30956 | 5 | 1 | 16 |
MV_3 | 30956 | 5 | 1 | 15 |
NARQ_12 | 30956 | 5 | 5 | 15 |
NARQ_13 | 30956 | 5 | 5 | 15 |
ZIP_2 | 30956 | 5 | 1 | 15 |
ZIP_4 | 30956 | 5 | 1 | 14 |
ZIP_4R | 30956 | 5 | 5 | 14 |
new_relationship | 26606 | 3 | 1 | 8 |
BFI_agree_2 | 30956 | 5 | 1 | 13 |
MV_P_3 | 30956 | 5 | 1 | 13 |
satisfaction_sexual_intercourse | 29562 | 6 | 1 | 15 |
days_with_partner_per_month | 9538 | 6 | > 14 days | 14 |
duration_pill | 16212 | 6 | 1 | 14 |
long_distance_relationship_2 | 9538 | 6 | 5 | 14 |
BFI_consc_3 | 30956 | 5 | 1 | 11 |
ZIP_3 | 30956 | 5 | 1 | 11 |
BFI_agree_7 | 30956 | 5 | 1 | 10 |
occupation_clean | 30850 | 7 | Nicht berufstätig | 13 |
NARQ_2 | 30956 | 5 | 5 | 9 |
SOI_R_1 | 30956 | 5 | 5 | 9 |
sexy | 30956 | 5 | 1 | 8 |
attractiveness_face | 30956 | 5 | 1 | 7 |
attractiveness_overall | 30956 | 5 | 1 | 7 |
BFI_open_2 | 30956 | 5 | 1 | 7 |
BFI_open_3 | 30956 | 5 | 1 | 7 |
NARQ_17 | 30956 | 5 | 5 | 7 |
ZIP_1 | 30956 | 5 | 1 | 7 |
ZIP_6 | 30956 | 5 | 1 | 7 |
partner_mate_retention | 29874 | 22 | 6 | 30 |
number_of_cycles | 30924 | 6 | 5 | 8 |
RCD_squished | 18018 | 31 | 0 | 41 |
attractiveness_ltp | 30956 | 5 | 1 | 6 |
partner_attractiveness_longterm | 30956 | 5 | -3.97818492731748 | 6 |
cycle_nr | 30924 | 6 | 5 | 7 |
duration_relationship_months | 30956 | 13 | 12 | 15 |
attractiveness_ltp_self | 30956 | 5 | 1 | 5 |
contraceptives_broad_categories | 30952 | 11 | partner_sterilised | 11 |
distance_partner | 9538 | 8 | 6 | 8 |
distance_to_partner_hours | 9538 | 8 | 9-12h | 8 |
partner_attractiveness_money | 30956 | 13 | 1 | 12 |
attractiveness_body | 30956 | 5 | 1 | 4 |
SOI_im | 30956 | 13 | 5 | 9 |
religion_clean | 30881 | 6 | Judentum | 4 |
MV_short | 30956 | 13 | 1 | 8 |
NARQ_14 | 30956 | 5 | 5 | 3 |
gestagen_ug | 2 | 2 | 250 | 1 |
male_jealousy | 29876 | 17 | 5.33333333333333 | 8 |
NARQ_rivalry | 29876 | 17 | 5.66666666666667 | 8 |
method_meeting_clean | 16222 | 7 | Depotspritze | 3 |
extra_pair_flirting | 29873 | 17 | 5.66666666666667 | 7 |
MV | 30956 | 17 | 1.8 | 7 |
SOI_be | 30956 | 13 | 4.66666666666667 | 5 |
SGSE | 30956 | 21 | 4.8 | 8 |
contraceptives_categories_natural | 29911 | 6 | infertile | 2 |
estrogen | 195 | 3 | 30 | 1 |
estrogen_ug | 1497 | 6 | 15 | 2 |
estrogen_ug_merged | 2157 | 6 | 15 | 2 |
female_jealousy | 30924 | 18 | 5.66666666666667 | 6 |
gestagen | 195 | 3 | 13 | 1 |
hormonal_1 | 1716 | 6 | 4 | 2 |
extra_pair_desire | 29873 | 27 | 5.8 | 7 |
estrogen_ug_other | 1315 | 4 | 420 | 1 |
partner_attractiveness_shortterm | 30956 | 16 | -3.34512824374663 | 4 |
MV_P_short | 30956 | 13 | 1 | 3 |
partner_attractiveness_physical | 30956 | 9 | 2 | 2 |
sex_orientation | 30956 | 5 | pansexuell | 1 |
RJS | 30956 | 25 | 4.83333333333333 | 4 |
hormonal_2 | 1716 | 9 | circlet | 1 |
gestagens | 1497 | 10 | NGT | 1 |
gestagens_ug | 1497 | 10 | 125 | 1 |
estrogen_ug_all | 26785 | 11 | 360 | 1 |
estrogen_ug_cycle | 17120 | 12 | 360 | 1 |
gestagen_type | 16178 | 12 | NES | 1 |
gestagen_ug_merged | 15610 | 12 | 1500 | 1 |
menarche | 13225 | 12 | 19 | 1 |
in_pair_desire | 29874 | 56 | 4.4 | 4 |
occupation | 30956 | 14 | Beamtin | 1 |
BFI_neuro | 30956 | 32 | 1.125 | 2 |
gestagen_cycle | 14567 | 16 | 1.44 | 1 |
intensive_sports | 26606 | 16 | 14 | 1 |
SOI_R | 30956 | 35 | 4.22222222222222 | 2 |
MV_P | 30956 | 18 | 4.8 | 1 |
cigarettes | 26606 | 19 | 11 | 1 |
choice_of_clothing | 29881 | 41 | 5.5 | 2 |
cycle_length | 30956 | 21 | 39 | 1 |
cyklus_3 | 30956 | 21 | 39 | 1 |
day_count | 30956 | 42 | 42 | 2 |
days | 30956 | 42 | 25 | 2 |
fertile_days_known_backward | 30956 | 42 | 41 | 2 |
first_time | 29943 | 21 | 10 | 1 |
MV_diff | 30956 | 24 | -3.66666666666667 | 1 |
CJS | 30956 | 25 | 5 | 1 |
ZIP | 30956 | 27 | 1.28571428571429 | 1 |
partner_attractiveness_global | 30956 | 29 | -2.95278253338198 | 1 |
female_mate_retention | 30924 | 30 | 3.33333333333333 | 1 |
BFI_extra | 30956 | 31 | 1.125 | 1 |
BFI_agree | 30956 | 32 | 1.33333333333333 | 1 |
NARQ_K_total | 29876 | 32 | 5.83333333333333 | 1 |
BFI_open | 30956 | 33 | 1.5 | 1 |
BFI_consc | 30956 | 34 | 1.22222222222222 | 1 |
age | 30956 | 42 | 54 | 1 |
fertile_days_known_backward_inferred | 30956 | 43 | 42 | 1 |
fertile_days_known_forward | 30956 | 43 | 42 | 1 |
n_days | 30956 | 43 | 42 | 1 |
height | 30956 | 46 | 116 | 1 |
number_sexual_partner | 30956 | 47 | 100 | 1 |
NARQ | 30956 | 56 | 1 | 1 |
extra_pair | 29873 | 62 | 5.75 | 1 |
partner_attractiveness_rel_to_self | 30956 | 65 | -0.162454954632523 | 1 |
weight_post | 26606 | 68 | 102 | 1 |
contraception | 30952 | 73 | coitus_interruptus, temperature_billings | 1 |
weight | 30956 | 79 | 101 | 1 |
last_lag | 27362 | 95 | 107 | 1 |
cycle_length_diary | 18018 | 111 | 101 | 1 |
median_cycle_length_diary | 26728 | 127 | 10 | 1 |
estrogen_ug_per_kg | 17120 | 143 | 10.2083333333333 | 1 |
attractiveness_income_age_corrected | 30956 | 148 | -0.049531448419045 | 1 |
mean_cycle_length_diary | 30924 | 155 | 10 | 1 |
timespan | 30924 | 181 | 101 | 1 |
took_days | 30924 | 181 | 101 | 1 |
minimum_cycle_length_diary | 30924 | 197 | 109 | 1 |
duration_relationship_total | 30820 | 206 | 110 | 1 |
duration_relationship_years | 30820 | 206 | 10.25 | 1 |
menstrual_onset_days_until | 18614 | 219 | -105 | 1 |
RCD | 18614 | 219 | -105 | 1 |
RCD_for_merge | 18614 | 219 | 106 | 1 |
RCD_rel_to_ovulation | 18614 | 219 | -101 | 1 |
FCD | 30924 | 270 | 123 | 1 |
menstrual_onset_days_since | 30924 | 270 | 122 | 1 |
day_number | 30924 | 314 | 159 | 1 |
RCD_inferred | 30924 | 446 | -106 | 1 |
BMI | 26606 | 568 | 10.8108108108108 | 1 |
fertile_mean | 30956 | 888 | 0.0111111111111111 | 1 |
# drop
diary = diary %>%
# durations redundant with total duration, use age group, sex orientation has overly rare categories
select(-duration_relationship_months,-duration_relationship_years, -age, -sex_orientation
)
## round/group
diary = diary %>% mutate(
# height = round_any(height, 5), # rounding would be nice but leaves extreme values out there
# weight = round_any(weight, 5),
# BMI = round_any(BMI, 2),
duration_relationship_total = round_any(duration_relationship_total/12, 0.3)
# first_time = round_any(first_time, 3)
) %>%
select(
-estrogen_ug_per_kg # derived factors from weight can allow people to recompute weight
)
## cap
na_if_lt = function(x, lt) {
x = as.numeric(x)
if_else(x < lt, NA_real_, x)
}
na_if_gt = function(x, gt) {
x = as.numeric(x)
if_else(x > gt, NA_real_, x)
}
diary = diary %>% mutate(
RCD = na_if_lt(RCD, -40),
RCD_rel_to_ovulation = na_if_lt(RCD_rel_to_ovulation, -20),
RCD_inferred = na_if_gt(na_if_lt(RCD_inferred, -40), 30),
RCD_for_merge = na_if_gt(RCD_for_merge, 50),
FCD = na_if_gt(FCD, 40),
day_number = na_if_gt(day_number, 40),
menstrual_onset_days_since = na_if_gt(menstrual_onset_days_since, 40),
menstrual_onset_days_until = na_if_lt(menstrual_onset_days_until, -40),
minimum_cycle_length_diary = na_if_gt(minimum_cycle_length_diary, 40),
took_days = na_if_gt(took_days, 40),
timespan = na_if_gt(timespan, 40),
mean_cycle_length_diary = na_if_gt(mean_cycle_length_diary, 40),
median_cycle_length_diary = na_if_gt(mean_cycle_length_diary, 40),
cycle_length_diary = na_if_gt(mean_cycle_length_diary, 40),
last_lag = na_if_gt(last_lag, 40),
number_sexual_partner = na_if_gt(number_sexual_partner, 30)
)
diary %>% gather(variable, value, -person) %>%
group_by(variable) %>%
summarise(n = n_nonmissing(value), n_dist = n_distinct(value), lc_val = commonness_rarest_value(value, person, T), lc_freq = commonness_rarest_value(value, person, F)) %>%
arrange(desc(lc_freq/(n_dist+lc_freq))) %>%
pander()
variable | n | n_dist | lc_val | lc_freq |
---|---|---|---|---|
ended.vorab | 30956 | 1 | TRUE | 1208 |
gender | 30956 | 1 | 1 | 1208 |
hetero_relationship | 30956 | 1 | 1 | 1208 |
weekend | 30956 | 2 | TRUE | 1122 |
time_since_last_response | 23275 | 2 | [10,31] | 1097 |
menstruated_at_all | 30956 | 2 | TRUE | 910 |
in_pair_public_intimacy | 29902 | 3 | 0 | 1081 |
mate_retention_1 | 29902 | 3 | 2 | 960 |
spent_night_with_partner | 29902 | 3 | 0 | 960 |
fertile_broad_backward_inferred | 17013 | 3 | 0.0533333333333333 | 959 |
had_sexual_intercourse | 29902 | 3 | 1 | 945 |
premenstrual_phase_backward_inferred | 27143 | 3 | TRUE | 929 |
premenstrual_phase_fab | 27143 | 3 | TRUE | 929 |
fertile_broad_forward_counted | 16201 | 3 | 0.0533333333333333 | 928 |
fertile_window_forward_counted | 16201 | 3 | broad | 919 |
fertile_window_backward_inferred | 17013 | 3 | broad | 915 |
menstruation_1 | 29901 | 3 | 1 | 910 |
fertile_narrow_forward_counted | 12407 | 3 | 0.51 | 908 |
fertile_narrow_backward_inferred | 13345 | 3 | 0.51 | 905 |
had_petting | 29901 | 3 | 1 | 904 |
pill_contraception | 30956 | 2 | 0 | 598 |
premenstrual_phase_forward_counted | 27380 | 3 | TRUE | 873 |
contraception_meeting_partner | 30956 | 2 | 2 | 581 |
pill_in_last_3_months | 30956 | 2 | 0 | 566 |
pille_control | 30956 | 2 | 2 | 566 |
menstruation | 30956 | 3 | pre | 819 |
premenstrual_phase | 17887 | 3 | TRUE | 819 |
premenstrual_phase_squished | 17944 | 3 | TRUE | 807 |
sufficient_diary_coverage | 30956 | 2 | FALSE | 518 |
partner_initiated_sexual_intercourse | 6512 | 3 | 0 | 772 |
sexual_intercourse_3 | 6512 | 3 | 1 | 772 |
hormonal_contraception | 30956 | 2 | 0 | 514 |
menstrual_onset | 18614 | 3 | TRUE | 726 |
fertile_broad_squished | 11877 | 3 | 0.44 | 697 |
fertile_broad | 11682 | 3 | 0.44 | 694 |
fertile_window_squished | 11877 | 3 | broad | 683 |
fertile_window | 11682 | 3 | broad | 675 |
fertile_narrow | 9648 | 3 | 0.51 | 670 |
fertile_narrow_squished | 9702 | 3 | 0.51 | 667 |
ended.abschluss | 30956 | 2 | FALSE | 439 |
hormonal_contra | 30952 | 3 | TRUE | 602 |
hormonal_all | 30952 | 3 | FALSE | 585 |
mate_retention_2 | 29902 | 4 | 1 | 758 |
dodgy_data | 30924 | 3 | TRUE | 566 |
any_RCD | 30956 | 2 | FALSE | 368 |
SJS_5 | 30956 | 2 | 2 | 357 |
SJS_5R | 30956 | 2 | 1 | 357 |
hormonal | 30952 | 3 | FALSE | 514 |
SJS_6 | 30956 | 2 | 2 | 312 |
ever_menstruated | 30956 | 2 | FALSE | 298 |
had_any_period | 30956 | 2 | FALSE | 298 |
sexual_intercourse_2 | 29902 | 4 | 2 | 595 |
SJS_4 | 30956 | 2 | 2 | 286 |
sexual_intercourse_5 | 29901 | 4 | 2 | 567 |
include_all | 30952 | 3 | TRUE | 421 |
included | 28493 | 3 | cycling | 421 |
included_all | 28493 | 3 | cycling | 421 |
antibiotics | 30956 | 2 | 1 | 279 |
hormonal_medication_in_last_3_months | 30956 | 2 | 1 | 279 |
weekday | 30924 | 8 | Saturday | 1034 |
hormonal_lax | 30952 | 3 | TRUE | 374 |
week_number | 30924 | 7 | (28,35] | 833 |
stress | 26606 | 3 | 1 | 355 |
days_with_partner | 30956 | 3 | 3-5 days | 346 |
cohabitation | 30956 | 3 | Live in same city | 334 |
long_distance_relationship | 30956 | 3 | 2 | 334 |
sexual_intercourse_1 | 29902 | 6 | 1 | 663 |
sexual_intercourse_1_6scale | 29902 | 6 | 1.2 | 663 |
medicament_1 | 26606 | 3 | 1 | 330 |
nights_with_partner | 30956 | 3 | 3-5 nights | 329 |
attention_2 | 29874 | 7 | 6 | 766 |
SJS_3 | 30956 | 2 | 2 | 213 |
SJS_3R | 30956 | 2 | 1 | 213 |
male_attention_1 | 29874 | 7 | 2 | 739 |
situation_of_living | 18107 | 3 | 1 | 294 |
hormonal_conservative | 30940 | 3 | TRUE | 293 |
menstruation_3 | 4794 | 4 | 3 | 390 |
menstruation_strength | 30956 | 4 | 3 | 390 |
days_since_menstrual_onset | 4068 | 6 | 4 | 571 |
SJS_1 | 30956 | 2 | 2 | 188 |
illness_1 | 26606 | 3 | 1 | 271 |
children_broad_categories | 30956 | 2 | children | 180 |
mate_retention_3 | 29876 | 7 | 6 | 594 |
desirability_1 | 29881 | 7 | 1 | 562 |
choice_of_clothing_3 | 29887 | 7 | 1 | 554 |
communication_partner_1 | 29902 | 4 | 1 | 311 |
choice_of_clothing_6 | 29883 | 7 | 6 | 541 |
desirability_partner | 29879 | 7 | 1 | 537 |
time_of_response | 30924 | 11 | [17.0,17.3) | 835 |
any_fertile_days_known | 30956 | 2 | FALSE | 147 |
attention_1 | 29874 | 7 | 2 | 510 |
jealousy_1 | 29877 | 7 | 6 | 463 |
extra_pair_3 | 29873 | 7 | 6 | 451 |
cycle_regularity | 30956 | 3 | irregular, more than 5 days off | 192 |
cyklus_5 | 30956 | 3 | 3 | 192 |
extra_pair_6 | 29873 | 7 | 2 | 445 |
male_mate_retention_1 | 29876 | 7 | 6 | 444 |
choice_of_clothing_1 | 29896 | 7 | 6 | 431 |
mate_retention_4 | 29876 | 7 | 2 | 427 |
male_mate_retention_2 | 29876 | 7 | 2 | 418 |
mate_retention_6 | 29875 | 7 | 6 | 417 |
flat_share | 10737 | 3 | 1 | 173 |
extra_pair_12 | 29873 | 7 | 6 | 389 |
hormonal_strict | 30940 | 3 | TRUE | 161 |
SJS_2 | 30956 | 2 | 2 | 107 |
SJS_2R | 30956 | 2 | 1 | 107 |
menstruation_2 | 4794 | 7 | 6 | 368 |
extra_pair_5 | 29873 | 7 | 2 | 348 |
choice_of_clothing_4 | 29884 | 7 | 6 | 343 |
time_for_response | 29143 | 11 | [0.10, 2.15) | 531 |
extra_pair_2 | 29874 | 7 | 6 | 336 |
we_know_fertile_days | 30956 | 2 | FALSE | 96 |
include_lax | 30952 | 3 | TRUE | 143 |
included_lax | 17960 | 3 | cycling | 143 |
self_esteem_1 | 29881 | 7 | 1 | 309 |
BFI_consc_9 | 30956 | 5 | 3 | 219 |
BFI_consc_9R | 30956 | 5 | 3 | 219 |
living_situation | 30956 | 4 | living in all-female flatshare | 173 |
choice_of_clothing_8 | 29882 | 7 | 6 | 291 |
showy_clothes | 29882 | 7 | 6 | 291 |
extra_pair_1 | 29867 | 3 | 1 | 124 |
extra_pair_intimacy | 29867 | 3 | 1 | 124 |
NARQ_admiration_3 | 29877 | 7 | 6 | 289 |
estrogen_categories | 26785 | 4 | (300,600] | 153 |
mate_retention_5 | 29876 | 7 | 6 | 265 |
include_conservative | 30952 | 3 | TRUE | 112 |
included_conservative | 14147 | 3 | cycling | 112 |
SOI_R_6 | 30956 | 5 | 2 | 183 |
SOI_R_6R | 30956 | 5 | 4 | 183 |
prc_stirn_b_forward_counted | 27380 | 20 | 0.14 | 687 |
fertile_fab | 27143 | 20 | 0.05 | 679 |
fertile_forward_and_backward | 27143 | 20 | 0.05 | 679 |
prc_stirn_b_backward_inferred | 27143 | 20 | 0.05 | 679 |
SOI_R_5 | 30956 | 5 | 3 | 169 |
extra_pair_13 | 29873 | 7 | 6 | 234 |
extra_pair_sexual_fantasies | 29873 | 7 | 6 | 234 |
choice_of_clothing_2 | 29892 | 7 | 1 | 233 |
MV_2 | 30956 | 5 | 1 | 162 |
attention | 29874 | 12 | 1.5 | 382 |
choice_of_clothing_7 | 29881 | 7 | 6 | 222 |
fertile | 27828 | 20 | 0.16 | 619 |
attractiveness_finance_2 | 30956 | 5 | 5 | 152 |
income_partner | 30956 | 5 | > 3000€ | 152 |
NARQ_admiration_2 | 29877 | 7 | 6 | 211 |
SOI_R_4 | 30956 | 5 | 1 | 148 |
NARQ_admiration_1 | 29877 | 7 | 6 | 197 |
BFI_open_10 | 30956 | 5 | 1 | 136 |
choice_of_clothing_5 | 29885 | 7 | 6 | 186 |
extra_pair_7 | 29873 | 7 | 6 | 186 |
had_sex_with_partner_yet | 30956 | 2 | 0 | 53 |
male_mate_retention | 29876 | 12 | 6 | 314 |
relationship_satisfaction_1 | 29902 | 8 | 4.5 | 208 |
extra_pair_9 | 29873 | 7 | 6 | 179 |
ended.nachbe_other_hormonal | 30956 | 2 | TRUE | 51 |
extra_pair_10 | 29873 | 7 | 6 | 178 |
BFI_agree_6 | 30956 | 5 | 1 | 126 |
BFI_agree_6R | 30956 | 5 | 5 | 126 |
BFI_extra_7 | 30956 | 5 | 5 | 126 |
BFI_extra_7R | 30956 | 5 | 1 | 126 |
trying_to_get_pregnant | 30956 | 2 | 1 | 50 |
prc_wcx_b_forward_counted | 27380 | 26 | 0.006 | 624 |
CJS_6 | 30956 | 5 | 5 | 119 |
prc_wcx_b_backward_inferred | 27143 | 26 | 0.001 | 603 |
BFI_agree_4 | 30956 | 5 | 1 | 113 |
NARQ_1 | 30956 | 5 | 5 | 112 |
BFI_consc_4 | 30956 | 5 | 1 | 111 |
BFI_consc_4R | 30956 | 5 | 5 | 111 |
included_levels | 28493 | 5 | lax | 111 |
SGSE_5 | 30956 | 5 | 5 | 111 |
BFI_neuro_7 | 30956 | 5 | 5 | 108 |
RJS_1 | 30956 | 5 | 5 | 108 |
CJS_3 | 30956 | 5 | 1 | 106 |
MV_P_2 | 30956 | 5 | 1 | 105 |
SGSE_4 | 30956 | 5 | 5 | 104 |
SGSE_4R | 30956 | 5 | 1 | 104 |
extra_pair_going_out | 29873 | 12 | 5.5 | 242 |
male_jealousy_1 | 29876 | 7 | 6 | 141 |
male_jealousy_2 | 29877 | 7 | 6 | 138 |
BFI_neuro_8 | 30956 | 5 | 1 | 98 |
NARQ_6 | 30956 | 5 | 5 | 98 |
SOI_R_3 | 30956 | 5 | 5 | 97 |
cyklus_4 | 30956 | 5 | 1 | 95 |
include_strict | 30952 | 3 | TRUE | 57 |
included_bool | 7682 | 3 | TRUE | 57 |
included_strict | 7682 | 3 | cycling | 57 |
extra_pair_1b | 399 | 3 | 1 | 54 |
extra_pair_sex | 30924 | 3 | 1 | 54 |
fertile_cont | 17887 | 20 | 0.05 | 360 |
prc_stirn_b | 17887 | 20 | 0.05 | 360 |
extra_pair_8 | 29873 | 7 | 6 | 125 |
RJS_3 | 30956 | 5 | 5 | 89 |
BFI_neuro_6 | 30956 | 5 | 1 | 87 |
BFI_neuro_6R | 30956 | 5 | 5 | 87 |
relationship_status_clean | 30956 | 3 | Verlobt | 52 |
BFI_agree_1 | 30956 | 5 | 5 | 86 |
BFI_agree_1R | 30956 | 5 | 1 | 86 |
BFI_consc_8 | 30956 | 5 | 1 | 85 |
BFI_consc_8R | 30956 | 5 | 5 | 85 |
BFI_neuro_5 | 30956 | 5 | 5 | 85 |
BFI_neuro_5R | 30956 | 5 | 1 | 85 |
BFI_extra_2 | 30956 | 5 | 1 | 84 |
BFI_extra_2R | 30956 | 5 | 5 | 84 |
MV_P_5 | 30956 | 5 | 1 | 84 |
MV_P_5R | 30956 | 5 | 5 | 84 |
extra_pair_4 | 29873 | 7 | 6 | 116 |
BFI_agree_8 | 30956 | 5 | 1 | 82 |
BFI_agree_8R | 30956 | 5 | 5 | 82 |
BFI_neuro_3 | 30956 | 5 | 1 | 81 |
SGSE_1 | 30956 | 5 | 5 | 81 |
ended | 30956 | 2 | FALSE | 32 |
attractiveness_stp_self | 30956 | 5 | 1 | 79 |
CJS_2 | 30956 | 5 | 5 | 79 |
attractiveness_finance_1 | 30956 | 5 | 5 | 78 |
extra_pair_compliments | 29873 | 12 | 6 | 187 |
male_jealousy_3 | 29876 | 7 | 6 | 109 |
SGSE_3 | 30956 | 5 | 5 | 76 |
SGSE_3R | 30956 | 5 | 1 | 76 |
attractiveness_stp | 30956 | 5 | 1 | 75 |
NARQ_7 | 30956 | 5 | 1 | 75 |
extra_pair_11 | 29873 | 7 | 6 | 102 |
NARQ_rivalry_1 | 29876 | 7 | 6 | 100 |
MV_5 | 30956 | 5 | 1 | 71 |
MV_5R | 30956 | 5 | 5 | 71 |
NARQ_3 | 30956 | 5 | 5 | 71 |
SOI_R_7 | 30956 | 5 | 5 | 70 |
NARQ_rivalry_2 | 29877 | 7 | 6 | 97 |
BFI_neuro_2 | 30956 | 5 | 1 | 69 |
BFI_neuro_2R | 30956 | 5 | 5 | 69 |
prc_stirn_b_squished | 17944 | 20 | 0.16 | 271 |
has_not_had_sex_yet | 30956 | 2 | 1 | 27 |
BFI_neuro_1 | 30956 | 5 | 5 | 67 |
cyklus_2 | 30956 | 5 | 2 | 66 |
pregnant | 30956 | 2 | 1 | 26 |
pregnant_in_last_3_months | 30956 | 2 | yes | 26 |
NARQ_15 | 30956 | 5 | 5 | 64 |
day_number | 28729 | 42 | 35 | 534 |
NARQ_16 | 30956 | 5 | 5 | 62 |
cycle_length_groups | 30956 | 4 | (35,41] | 49 |
BFI_extra_8 | 30956 | 5 | 1 | 61 |
contraceptive_methods | 30357 | 5 | fertility_awareness | 61 |
SOI_R_2 | 30956 | 5 | 5 | 60 |
RJS_5 | 30956 | 5 | 5 | 59 |
BFI_open_7 | 30956 | 5 | 1 | 58 |
BFI_open_7R | 30956 | 5 | 5 | 58 |
RJS_6 | 30956 | 5 | 5 | 58 |
BFI_open_5 | 30956 | 5 | 1 | 56 |
ZIP_7 | 30956 | 5 | 1 | 56 |
ZIP_7R | 30956 | 5 | 5 | 56 |
NARQ_11 | 30956 | 5 | 1 | 55 |
want_more_info | 26606 | 3 | 0 | 33 |
prc_wcx_b | 17887 | 26 | 0 | 277 |
prc_wcx_b_squished | 17944 | 26 | 0.018 | 271 |
change_contraception_1 | 26606 | 3 | 1 | 31 |
BFI_open_9 | 30956 | 5 | 1 | 51 |
BFI_open_9R | 30956 | 5 | 5 | 51 |
CJS_1 | 30956 | 5 | 5 | 51 |
attractiveness_relativ_1 | 30956 | 4 | 4 | 40 |
breast_feeding | 30956 | 2 | 1 | 20 |
breast_feeding_in_last_3_months | 30956 | 2 | 1 | 20 |
SGSE_2 | 30956 | 5 | 5 | 49 |
BFI_open_1 | 30956 | 5 | 1 | 47 |
age_group | 30956 | 6 | (45,70] | 56 |
BFI_extra_6 | 30956 | 5 | 1 | 45 |
BFI_open_6 | 30956 | 5 | 1 | 44 |
biggest_diff | 30956 | 10 | [ 2.04, 2.13) | 88 |
NARQ_18 | 30956 | 5 | 1 | 44 |
NARQ_9 | 30956 | 5 | 5 | 44 |
BFI_consc_2 | 30956 | 5 | 1 | 43 |
BFI_consc_2R | 30956 | 5 | 5 | 43 |
NARQ_5 | 30956 | 5 | 1 | 43 |
MV_P_1 | 30956 | 5 | 5 | 42 |
attractiveness_finance_self | 13229 | 6 | 5 | 49 |
income | 13229 | 6 | > 3000€ | 49 |
MV_4 | 30956 | 5 | 1 | 40 |
avg_diff | 30885 | 11 | [1.055, 1.09) | 84 |
BFI_extra_5 | 30956 | 5 | 1 | 38 |
BFI_extra_5R | 30956 | 5 | 5 | 38 |
sexy_clothes | 30924 | 18 | 6 | 133 |
breakup | 26606 | 3 | 1 | 22 |
communication_partner_2 | 28857 | 8 | 6 | 58 |
BFI_extra_4 | 30956 | 5 | 1 | 36 |
BFI_extra_3 | 30956 | 5 | 1 | 34 |
BFI_neuro_4 | 30956 | 5 | 1 | 34 |
NARQ_rivalry_3 | 29876 | 7 | 6 | 46 |
BFI_agree_5 | 30956 | 5 | 1 | 32 |
SOI_R_8 | 30956 | 5 | 5 | 32 |
religiosity | 30956 | 5 | 5 | 31 |
RJS_4 | 30956 | 5 | 5 | 31 |
days_responded_percentage | 30885 | 11 | [0.95122,0.976) | 67 |
CJS_5 | 30956 | 5 | 5 | 30 |
NARQ_8 | 30956 | 5 | 5 | 30 |
RJS_2 | 30956 | 5 | 5 | 30 |
BFI_consc_5 | 30956 | 5 | 1 | 29 |
certainty_menstruation | 30956 | 5 | 1 | 29 |
cyklus_6 | 30956 | 5 | 2 | 29 |
SOI_R_9 | 30956 | 5 | 5 | 29 |
attractiveness_occupation | 30956 | 5 | 1 | 28 |
contraceptive_method | 30956 | 5 | other | 27 |
MV_P_4 | 30956 | 5 | 1 | 27 |
NARQ_admiration | 29877 | 17 | 6 | 88 |
SOI_uv | 30956 | 13 | 1.33333333333333 | 67 |
SJS | 30956 | 7 | 2 | 36 |
BFI_open_8 | 30956 | 5 | 1 | 25 |
sexual_intercourse_satisfaction | 6511 | 8 | 1 | 38 |
BFI_consc_6 | 30956 | 5 | 1 | 23 |
contraceptive_method_by_pearl | 30956 | 5 | fertility_awareness | 23 |
attractiveness_finance | 30956 | 9 | 5 | 41 |
CJS_4 | 30956 | 5 | 5 | 22 |
MV_1 | 30956 | 5 | 5 | 22 |
BFI_extra_1 | 30956 | 5 | 1 | 21 |
NARQ_4 | 30956 | 5 | 5 | 21 |
BFI_agree_9 | 30956 | 5 | 1 | 20 |
BFI_open_4 | 30956 | 5 | 1 | 20 |
NARQ_10 | 30956 | 5 | 5 | 19 |
ZIP_5 | 30956 | 5 | 1 | 19 |
BFI_agree_3 | 30956 | 5 | 1 | 18 |
BFI_agree_3R | 30956 | 5 | 5 | 18 |
BFI_consc_7 | 30956 | 5 | 1 | 18 |
no_variation | 29891 | 3 | TRUE | 10 |
BFI_consc_1 | 30956 | 5 | 1 | 16 |
MV_3 | 30956 | 5 | 1 | 15 |
NARQ_12 | 30956 | 5 | 5 | 15 |
NARQ_13 | 30956 | 5 | 5 | 15 |
ZIP_2 | 30956 | 5 | 1 | 15 |
ZIP_4 | 30956 | 5 | 1 | 14 |
ZIP_4R | 30956 | 5 | 5 | 14 |
new_relationship | 26606 | 3 | 1 | 8 |
BFI_agree_2 | 30956 | 5 | 1 | 13 |
MV_P_3 | 30956 | 5 | 1 | 13 |
satisfaction_sexual_intercourse | 29562 | 6 | 1 | 15 |
days_with_partner_per_month | 9538 | 6 | > 14 days | 14 |
duration_pill | 16212 | 6 | 1 | 14 |
long_distance_relationship_2 | 9538 | 6 | 5 | 14 |
BFI_consc_3 | 30956 | 5 | 1 | 11 |
ZIP_3 | 30956 | 5 | 1 | 11 |
FCD | 27288 | 41 | 40 | 89 |
menstrual_onset_days_since | 27380 | 42 | 39 | 89 |
BFI_agree_7 | 30956 | 5 | 1 | 10 |
occupation_clean | 30850 | 7 | Nicht berufstätig | 13 |
NARQ_2 | 30956 | 5 | 5 | 9 |
SOI_R_1 | 30956 | 5 | 5 | 9 |
sexy | 30956 | 5 | 1 | 8 |
attractiveness_face | 30956 | 5 | 1 | 7 |
attractiveness_overall | 30956 | 5 | 1 | 7 |
BFI_open_2 | 30956 | 5 | 1 | 7 |
BFI_open_3 | 30956 | 5 | 1 | 7 |
NARQ_17 | 30956 | 5 | 5 | 7 |
ZIP_1 | 30956 | 5 | 1 | 7 |
ZIP_6 | 30956 | 5 | 1 | 7 |
partner_mate_retention | 29874 | 22 | 6 | 30 |
number_of_cycles | 30924 | 6 | 5 | 8 |
RCD_squished | 18018 | 31 | 0 | 41 |
attractiveness_ltp | 30956 | 5 | 1 | 6 |
partner_attractiveness_longterm | 30956 | 5 | -3.97818492731748 | 6 |
RCD_rel_to_ovulation | 17731 | 37 | -20 | 44 |
cycle_nr | 30924 | 6 | 5 | 7 |
attractiveness_ltp_self | 30956 | 5 | 1 | 5 |
contraceptives_broad_categories | 30952 | 11 | partner_sterilised | 11 |
distance_partner | 9538 | 8 | 6 | 8 |
distance_to_partner_hours | 9538 | 8 | 9-12h | 8 |
partner_attractiveness_money | 30956 | 13 | 1 | 12 |
attractiveness_body | 30956 | 5 | 1 | 4 |
SOI_im | 30956 | 13 | 5 | 9 |
religion_clean | 30881 | 6 | Judentum | 4 |
MV_short | 30956 | 13 | 1 | 8 |
NARQ_14 | 30956 | 5 | 5 | 3 |
menstrual_onset_days_until | 17887 | 42 | -38 | 24 |
RCD | 17887 | 42 | -38 | 24 |
gestagen_ug | 2 | 2 | 250 | 1 |
male_jealousy | 29876 | 17 | 5.33333333333333 | 8 |
NARQ_rivalry | 29876 | 17 | 5.66666666666667 | 8 |
method_meeting_clean | 16222 | 7 | Depotspritze | 3 |
extra_pair_flirting | 29873 | 17 | 5.66666666666667 | 7 |
MV | 30956 | 17 | 1.8 | 7 |
SOI_be | 30956 | 13 | 4.66666666666667 | 5 |
SGSE | 30956 | 21 | 4.8 | 8 |
contraceptives_categories_natural | 29911 | 6 | infertile | 2 |
estrogen | 195 | 3 | 30 | 1 |
estrogen_ug | 1497 | 6 | 15 | 2 |
estrogen_ug_merged | 2157 | 6 | 15 | 2 |
female_jealousy | 30924 | 18 | 5.66666666666667 | 6 |
gestagen | 195 | 3 | 13 | 1 |
hormonal_1 | 1716 | 6 | 4 | 2 |
RCD_inferred | 28104 | 72 | -27 | 22 |
RCD_for_merge | 18097 | 51 | 49 | 14 |
extra_pair_desire | 29873 | 27 | 5.8 | 7 |
estrogen_ug_other | 1315 | 4 | 420 | 1 |
partner_attractiveness_shortterm | 30956 | 16 | -3.34512824374663 | 4 |
MV_P_short | 30956 | 13 | 1 | 3 |
partner_attractiveness_physical | 30956 | 9 | 2 | 2 |
minimum_cycle_length_diary | 23767 | 41 | 40 | 8 |
RJS | 30956 | 25 | 4.83333333333333 | 4 |
hormonal_2 | 1716 | 9 | circlet | 1 |
gestagens | 1497 | 10 | NGT | 1 |
gestagens_ug | 1497 | 10 | 125 | 1 |
estrogen_ug_all | 26785 | 11 | 360 | 1 |
estrogen_ug_cycle | 17120 | 12 | 360 | 1 |
gestagen_type | 16178 | 12 | NES | 1 |
gestagen_ug_merged | 15610 | 12 | 1500 | 1 |
menarche | 13225 | 12 | 19 | 1 |
in_pair_desire | 29874 | 56 | 4.4 | 4 |
occupation | 30956 | 14 | Beamtin | 1 |
BFI_neuro | 30956 | 32 | 1.125 | 2 |
gestagen_cycle | 14567 | 16 | 1.44 | 1 |
intensive_sports | 26606 | 16 | 14 | 1 |
SOI_R | 30956 | 35 | 4.22222222222222 | 2 |
MV_P | 30956 | 18 | 4.8 | 1 |
cigarettes | 26606 | 19 | 11 | 1 |
choice_of_clothing | 29881 | 41 | 5.5 | 2 |
cycle_length | 30956 | 21 | 39 | 1 |
cyklus_3 | 30956 | 21 | 39 | 1 |
day_count | 30956 | 42 | 42 | 2 |
days | 30956 | 42 | 25 | 2 |
fertile_days_known_backward | 30956 | 42 | 41 | 2 |
first_time | 29943 | 21 | 10 | 1 |
MV_diff | 30956 | 24 | -3.66666666666667 | 1 |
CJS | 30956 | 25 | 5 | 1 |
ZIP | 30956 | 27 | 1.28571428571429 | 1 |
number_sexual_partner | 30232 | 28 | 26 | 1 |
partner_attractiveness_global | 30956 | 29 | -2.95278253338198 | 1 |
female_mate_retention | 30924 | 30 | 3.33333333333333 | 1 |
BFI_extra | 30956 | 31 | 1.125 | 1 |
BFI_agree | 30956 | 32 | 1.33333333333333 | 1 |
NARQ_K_total | 29876 | 32 | 5.83333333333333 | 1 |
BFI_open | 30956 | 33 | 1.5 | 1 |
BFI_consc | 30956 | 34 | 1.22222222222222 | 1 |
last_lag | 25783 | 42 | 27 | 1 |
timespan | 18527 | 42 | 26 | 1 |
took_days | 18602 | 42 | 26 | 1 |
fertile_days_known_backward_inferred | 30956 | 43 | 42 | 1 |
fertile_days_known_forward | 30956 | 43 | 42 | 1 |
n_days | 30956 | 43 | 42 | 1 |
height | 30956 | 46 | 116 | 1 |
NARQ | 30956 | 56 | 1 | 1 |
extra_pair | 29873 | 62 | 5.75 | 1 |
partner_attractiveness_rel_to_self | 30956 | 65 | -0.162454954632523 | 1 |
weight_post | 26606 | 68 | 102 | 1 |
cycle_length_diary | 22767 | 69 | 10 | 1 |
mean_cycle_length_diary | 22767 | 69 | 10 | 1 |
median_cycle_length_diary | 22767 | 69 | 10 | 1 |
contraception | 30952 | 73 | coitus_interruptus, temperature_billings | 1 |
weight | 30956 | 79 | 101 | 1 |
duration_relationship_total | 30820 | 85 | 11.1 | 1 |
attractiveness_income_age_corrected | 30956 | 148 | -0.049531448419045 | 1 |
BMI | 26606 | 568 | 10.8108108108108 | 1 |
fertile_mean | 30956 | 888 | 0.0111111111111111 | 1 |
Ideally, no participant should be uniquely identified, but this is not realistic. Instead, I look for unique combinations of demographic variables.
cut_to_number = function(x, group = NULL, m = 40) {
if(!is.null(group)) {
df = data.frame(x = x, group = group) %>% unique()
} else {
df = data.frame(x = x)
}
cuts = Hmisc::cut2(df$x, m = m, onlycuts = T) # cut on person level
as.numeric(as.character(Hmisc::cut2(x, cuts = cuts, levels.mean = T)))
}
unique_combos = diary %>%
group_by(age_group, height, weight, religion_clean, duration_relationship_total) %>%
summarise(n = n()) %>%
ungroup() %>%
arrange(desc(n)) %>%
filter(n < 7)
nrow(unique_combos)
## [1] 198
diary = diary %>%
mutate(
religion_clean = recode(religion_clean, "Judentum" = "other", "Islam" = "other", "Buddhismus" = "other", .missing = "other"),
weight = cut_to_number(weight,person, m = 50),
height = cut_to_number(height,person, m = 50),
first_time = cut_to_number(first_time, person),
duration_relationship_total = cut_to_number(duration_relationship_total, person, m = 50)
)
unique_combos = diary %>%
group_by(age_group, height, weight, religion_clean, duration_relationship_total) %>%
summarise(n = n()) %>%
ungroup() %>%
arrange(desc(n)) %>%
filter(n < 7)
nrow(unique_combos)
## [1] 186
# unique_combos %>%
# pander()
diary = diary %>% left_join(unique_combos %>% select(-n) %>% mutate(too_unique = T))
diary = diary %>% mutate(
too_unique = if_else(is.na(too_unique), F, too_unique),
age_group = if_else(!too_unique, age_group, NA_integer_),
height = if_else(too_unique, NA_real_, height),
weight = if_else(too_unique, NA_real_, weight),
religion_clean = if_else(!too_unique, religion_clean, NA_character_),
duration_relationship_total = if_else(too_unique, NA_real_, duration_relationship_total)
)
diary_noep = diary %>% select(-starts_with("SOI")) %>% select(-starts_with("extra_pair")) %>% select(-number_sexual_partner, -trying_to_get_pregnant, -has_not_had_sex_yet, -had_sex_with_partner_yet, -first_time)
save(diary_noep, file = "diary_restricted_anonymised.rdata")
haven::write_sav(diary_noep, path = "diary_restricted_anonymised.sav")
## Error in if (any(bad_lengths)) {: missing value where TRUE/FALSE needed
save(diary, file = "diary_anonymised.rdata")
haven::write_sav(diary, path = "diary_anonymised.sav")
## Error in if (any(bad_lengths)) {: missing value where TRUE/FALSE needed