library(knitr)
opts_chunk$set(fig.width = 12, fig.height = 12, cache = F, warning = F, message = F)
source("0_helpers.R")
load("full_data.rdata")
diary = diary %>%
mutate(
included = included_all,
fertile = if_else(is.na(prc_stirn_b_squished), prc_stirn_b_backward_inferred, prc_stirn_b_squished),
contraceptive_methods = factor(contraceptive_method, levels =
c("barrier_or_abstinence", "fertility_awareness", "none", "hormonal")),
relationship_status_clean = factor(relationship_status_clean),
cohabitation = factor(cohabitation),
certainty_menstruation = as.numeric(as.character(certainty_menstruation))
) %>% group_by(person) %>%
mutate(
fertile_mean = mean(fertile, na.rm = T)
)
These fields were stored as text, but can be simplified.
diary = diary %>% mutate(
want_more_info = as.integer(want_more_info),
trying_to_get_pregnant = if_else(trying_to_get_pregnant == 'yes', 1, 0),
had_sex_with_partner_yet = if_else(had_sex_with_partner_yet == 'yes', 1, 0),
breast_feeding_in_last_3_months = if_else(breast_feeding_in_last_3_months == 'yes', 1, 0),
hormonal_medication_in_last_3_months = if_else(hormonal_medication_in_last_3_months == 'yes', 1, 0),
pill_in_last_3_months = if_else(pill_in_last_3_months == 'yes', 1, 0)
)
Free text fields often have unique values and can contain things like IDs, addresses, free-text responses to questions, etc.
# first I look only at character columns (they tend to have most unique values)
diary %>% select_if(is.character) %>%
gather(variable, value) %>% # putting this in long format allows me to easily
group_by(variable) %>%
summarise(n = n_nonmissing(value), n_dist = n_distinct(value)) %>% # summarise frequencies of values
pander() # and display them
| variable | n | n_dist |
|---|---|---|
| change_contraception_2 | 1057 | 25 |
| children | 30956 | 10 |
| children_broad_categories | 30956 | 2 |
| children_narrow_categories | 30956 | 5 |
| contraception | 30952 | 73 |
| contraceptive_method | 30956 | 5 |
| contraceptive_method_by_pearl | 30956 | 5 |
| contraceptives_broad_categories | 30952 | 11 |
| contraceptives_categories_natural | 29911 | 6 |
| feedback_for_us | 6114 | 176 |
| gestagen_type | 16178 | 12 |
| hormonal_2 | 1716 | 9 |
| hypothesis_guessed | 30956 | 6 |
| illness_2 | 9284 | 167 |
| income_partner | 30956 | 5 |
| living_situation | 30956 | 4 |
| meaning_study | 14437 | 414 |
| medicament_2 | 11349 | 278 |
| method_meeting | 16257 | 10 |
| method_meeting_clean | 16222 | 7 |
| name_hormonal | 195 | 7 |
| occupation | 30956 | 14 |
| occupation_clean | 30850 | 7 |
| other_pill_name | 1531 | 43 |
| pills | 16212 | 72 |
| postal_code | 30956 | 403 |
| pregnant_in_last_3_months | 30956 | 2 |
| relationship_status | 30956 | 15 |
| religion | 30956 | 18 |
| religion_clean | 30881 | 6 |
| sess_day | 30924 | 30925 |
| session | 30956 | 1208 |
| session_id | 30924 | 30925 |
| session_id.abschluss | 26913 | 785 |
| session_id.vorab | 30956 | 1208 |
| sex_orientation | 30956 | 5 |
| short | 30956 | 1208 |
| short_session | 30924 | 1177 |
| special_days | 16985 | 403 |
| special_events | 2836 | 2010 |
# of course manual inspection (for me, using View() ) is important too
# same thing for factors shows no rare unique values
diary %>% select_if(is.factor) %>%
gather(variable, value) %>%
group_by(variable) %>%
summarise(n = n_nonmissing(value), n_dist = n_distinct(value)) %>%
arrange(n_dist) %>%
pander()
| variable | n | n_dist |
|---|---|---|
| weekend | 30956 | 2 |
| cohabitation | 30956 | 3 |
| cycle_regularity | 30956 | 3 |
| days_with_partner | 30956 | 3 |
| fertile_window | 11682 | 3 |
| fertile_window_backward_inferred | 17013 | 3 |
| fertile_window_forward_counted | 16201 | 3 |
| fertile_window_squished | 11877 | 3 |
| included | 28493 | 3 |
| included_all | 28493 | 3 |
| included_conservative | 14147 | 3 |
| included_lax | 17960 | 3 |
| included_strict | 7682 | 3 |
| menstruation | 30956 | 3 |
| nights_with_partner | 30956 | 3 |
| relationship_status_clean | 30956 | 3 |
| cycle_length_groups | 30956 | 4 |
| estrogen_categories | 26785 | 4 |
| menstruation_strength | 30956 | 4 |
| contraceptive_methods | 30357 | 5 |
| included_levels | 28493 | 5 |
| age_group | 30956 | 6 |
| days_with_partner_per_month | 9538 | 6 |
| income | 13229 | 6 |
| week_number | 30924 | 7 |
| distance_to_partner_hours | 9538 | 8 |
| weekday | 30924 | 8 |
| gestagens | 1497 | 10 |
# diary %>% select_if(is.character) %>% names() %>% cat(sep=", -")
# here I remove all columns that have text fields which might betray someone's identity
diary = diary %>% select(-session, -session_id, -session_id.vorab, -short, -sess_day, -session_id.abschluss, -short_session, -vpn, -session_id.nachbe_other_hormonal,
-pills,-other_pill_name, -religion, -relationship_status, -postal_code, -children, -method_meeting, -meaning_study, -medicament_2, -special_days, -illness_2, -change_contraception_2, -feedback_for_us, -name_hormonal, -hypothesis_guessed, -children_narrow_categories, -special_events)
diary %>% select_if(is.instant) %>%
gather(variable, value) %>%
group_by(variable) %>%
summarise(n = n_nonmissing(value), n_dist = n_distinct(value)) %>%
arrange(n_dist) %>%
pander()
| variable | n | n_dist |
|---|---|---|
| ended.nachbe_other_hormonal | 1716 | 52 |
| created.nachbe_other_hormonal | 1804 | 55 |
| modified.nachbe_other_hormonal | 1804 | 55 |
| first_day | 30924 | 209 |
| first_diary_date | 30924 | 209 |
| cyklus_1 | 30956 | 306 |
| last_menstruation | 27362 | 308 |
| next_menstrual_onset | 18614 | 330 |
| menstrual_onset_date_inferred | 4068 | 363 |
| last_diary_date | 30924 | 371 |
| menstruation_dates | 4805 | 437 |
| next_menstrual_onset_inferred | 30924 | 450 |
| last_menstrual_onset | 30924 | 453 |
| created_date | 30924 | 532 |
| ended.abschluss | 26432 | 770 |
| created.abschluss | 26913 | 785 |
| modified.abschluss | 26913 | 785 |
| created.vorab | 30956 | 1206 |
| modified.vorab | 30956 | 1206 |
| ended.vorab | 30956 | 1207 |
| ended | 30924 | 30582 |
| modified | 30924 | 30795 |
| created | 30924 | 30797 |
diary = diary %>%
# remove all datetime we don't need
select(-starts_with("created")) %>%
select(-starts_with("modified")) %>%
select(-starts_with("expired")) %>%
select(-first_day, -first_diary_date, -cyklus_1, -last_menstruation, -next_menstrual_onset, -menstrual_onset_date_inferred, -last_diary_date, -menstruation_dates, -next_menstrual_onset_inferred, -last_menstrual_onset) %>%
mutate( # we do want to know whether they finished a survey, but can't know when
ended = !is.na(ended),
ended.vorab = !is.na(ended.vorab),
ended.abschluss = !is.na(ended.abschluss),
ended.nachbe_other_hormonal = !is.na(ended.nachbe_other_hormonal)
)
a similar problem exists for intervals, but we might need these and I can easily round them. Here I unfortunately didn’t store them as type interval, so I have to look through all numeric types
# diary %>% select_if(is.interval) %>%
# gather(variable, value) %>%
# group_by(variable) %>%
# summarise(n = n_nonmissing(value), n_dist = n_distinct(value)) %>%
# arrange(n_dist) %>%
# pander()
The Hmisc::cut2 function allows me to group variables into g groups and try to make no group contain fewer than m values.
diary = diary %>%
mutate(
time_for_response = Hmisc::cut2(time_for_response, g = 10, m = 30),
time_of_response = Hmisc::cut2(time_of_response, g = 10, m = 30),
time_since_last_response = Hmisc::cut2(time_since_last_response, cuts = 10, m = 30),
biggest_diff = Hmisc::cut2(biggest_diff, g = 10, m = 30),
avg_diff = Hmisc::cut2(avg_diff, g = 10, m = 300),
days_responded_percentage = Hmisc::cut2(days_responded_percentage, g = 10, m = 300)
)
Although rare values won’t allow anyone to identify many participants, it is important to protect privacy for all participants. In our case especially, a potential attacker might know that e.g. his girlfriend uses a pill with a rare gestagen and identify her like this.
To this end, I wrote the function below. It gives (depending on the last argument) the rarest value and how frequent it is. I included the group variable, because a lot of demographic data is duplicated in my diary data.
commonness_rarest_value = function(x, group, give_value = FALSE) {
df = data.frame(x = x, group = group) %>% unique()
counts = table(df$x)
ret = counts[which.min(counts)]
if (give_value) names(ret) else ret
}
diary %>% gather(variable, value, -person) %>%
group_by(variable) %>%
summarise(n = n_nonmissing(value), n_dist = n_distinct(value), lc_val = commonness_rarest_value(value, person, T), lc_freq = commonness_rarest_value(value, person, F)) %>%
arrange(desc(lc_freq/(n_dist+lc_freq))) %>%
pander()
| variable | n | n_dist | lc_val | lc_freq |
|---|---|---|---|---|
| ended.vorab | 30956 | 1 | TRUE | 1208 |
| gender | 30956 | 1 | 1 | 1208 |
| hetero_relationship | 30956 | 1 | 1 | 1208 |
| weekend | 30956 | 2 | TRUE | 1122 |
| time_since_last_response | 23275 | 2 | [10,31] | 1097 |
| menstruated_at_all | 30956 | 2 | TRUE | 910 |
| in_pair_public_intimacy | 29902 | 3 | 0 | 1081 |
| mate_retention_1 | 29902 | 3 | 2 | 960 |
| spent_night_with_partner | 29902 | 3 | 0 | 960 |
| fertile_broad_backward_inferred | 17013 | 3 | 0.0533333333333333 | 959 |
| had_sexual_intercourse | 29902 | 3 | 1 | 945 |
| premenstrual_phase_backward_inferred | 27143 | 3 | TRUE | 929 |
| premenstrual_phase_fab | 27143 | 3 | TRUE | 929 |
| fertile_broad_forward_counted | 16201 | 3 | 0.0533333333333333 | 928 |
| fertile_window_forward_counted | 16201 | 3 | broad | 919 |
| fertile_window_backward_inferred | 17013 | 3 | broad | 915 |
| menstruation_1 | 29901 | 3 | 1 | 910 |
| fertile_narrow_forward_counted | 12407 | 3 | 0.51 | 908 |
| fertile_narrow_backward_inferred | 13345 | 3 | 0.51 | 905 |
| had_petting | 29901 | 3 | 1 | 904 |
| pill_contraception | 30956 | 2 | 0 | 598 |
| premenstrual_phase_forward_counted | 27380 | 3 | TRUE | 873 |
| contraception_meeting_partner | 30956 | 2 | 2 | 581 |
| pill_in_last_3_months | 30956 | 2 | 0 | 566 |
| pille_control | 30956 | 2 | 2 | 566 |
| menstruation | 30956 | 3 | pre | 819 |
| premenstrual_phase | 17887 | 3 | TRUE | 819 |
| premenstrual_phase_squished | 17944 | 3 | TRUE | 807 |
| sufficient_diary_coverage | 30956 | 2 | FALSE | 518 |
| partner_initiated_sexual_intercourse | 6512 | 3 | 0 | 772 |
| sexual_intercourse_3 | 6512 | 3 | 1 | 772 |
| hormonal_contraception | 30956 | 2 | 0 | 514 |
| menstrual_onset | 18614 | 3 | TRUE | 726 |
| fertile_broad_squished | 11877 | 3 | 0.44 | 697 |
| fertile_broad | 11682 | 3 | 0.44 | 694 |
| fertile_window_squished | 11877 | 3 | broad | 683 |
| fertile_window | 11682 | 3 | broad | 675 |
| fertile_narrow | 9648 | 3 | 0.51 | 670 |
| fertile_narrow_squished | 9702 | 3 | 0.51 | 667 |
| ended.abschluss | 30956 | 2 | FALSE | 439 |
| hormonal_contra | 30952 | 3 | TRUE | 602 |
| hormonal_all | 30952 | 3 | FALSE | 585 |
| mate_retention_2 | 29902 | 4 | 1 | 758 |
| dodgy_data | 30924 | 3 | TRUE | 566 |
| any_RCD | 30956 | 2 | FALSE | 368 |
| SJS_5 | 30956 | 2 | 2 | 357 |
| SJS_5R | 30956 | 2 | 1 | 357 |
| hormonal | 30952 | 3 | FALSE | 514 |
| SJS_6 | 30956 | 2 | 2 | 312 |
| ever_menstruated | 30956 | 2 | FALSE | 298 |
| had_any_period | 30956 | 2 | FALSE | 298 |
| sexual_intercourse_2 | 29902 | 4 | 2 | 595 |
| SJS_4 | 30956 | 2 | 2 | 286 |
| sexual_intercourse_5 | 29901 | 4 | 2 | 567 |
| include_all | 30952 | 3 | TRUE | 421 |
| included | 28493 | 3 | cycling | 421 |
| included_all | 28493 | 3 | cycling | 421 |
| antibiotics | 30956 | 2 | 1 | 279 |
| hormonal_medication_in_last_3_months | 30956 | 2 | 1 | 279 |
| weekday | 30924 | 8 | Saturday | 1034 |
| hormonal_lax | 30952 | 3 | TRUE | 374 |
| week_number | 30924 | 7 | (28,35] | 833 |
| stress | 26606 | 3 | 1 | 355 |
| days_with_partner | 30956 | 3 | 3-5 days | 346 |
| cohabitation | 30956 | 3 | Live in same city | 334 |
| long_distance_relationship | 30956 | 3 | 2 | 334 |
| sexual_intercourse_1 | 29902 | 6 | 1 | 663 |
| sexual_intercourse_1_6scale | 29902 | 6 | 1.2 | 663 |
| medicament_1 | 26606 | 3 | 1 | 330 |
| nights_with_partner | 30956 | 3 | 3-5 nights | 329 |
| attention_2 | 29874 | 7 | 6 | 766 |
| SJS_3 | 30956 | 2 | 2 | 213 |
| SJS_3R | 30956 | 2 | 1 | 213 |
| male_attention_1 | 29874 | 7 | 2 | 739 |
| situation_of_living | 18107 | 3 | 1 | 294 |
| hormonal_conservative | 30940 | 3 | TRUE | 293 |
| menstruation_3 | 4794 | 4 | 3 | 390 |
| menstruation_strength | 30956 | 4 | 3 | 390 |
| days_since_menstrual_onset | 4068 | 6 | 4 | 571 |
| SJS_1 | 30956 | 2 | 2 | 188 |
| illness_1 | 26606 | 3 | 1 | 271 |
| children_broad_categories | 30956 | 2 | children | 180 |
| mate_retention_3 | 29876 | 7 | 6 | 594 |
| desirability_1 | 29881 | 7 | 1 | 562 |
| choice_of_clothing_3 | 29887 | 7 | 1 | 554 |
| communication_partner_1 | 29902 | 4 | 1 | 311 |
| choice_of_clothing_6 | 29883 | 7 | 6 | 541 |
| desirability_partner | 29879 | 7 | 1 | 537 |
| time_of_response | 30924 | 11 | [17.0,17.3) | 835 |
| any_fertile_days_known | 30956 | 2 | FALSE | 147 |
| attention_1 | 29874 | 7 | 2 | 510 |
| jealousy_1 | 29877 | 7 | 6 | 463 |
| extra_pair_3 | 29873 | 7 | 6 | 451 |
| cycle_regularity | 30956 | 3 | irregular, more than 5 days off | 192 |
| cyklus_5 | 30956 | 3 | 3 | 192 |
| extra_pair_6 | 29873 | 7 | 2 | 445 |
| male_mate_retention_1 | 29876 | 7 | 6 | 444 |
| choice_of_clothing_1 | 29896 | 7 | 6 | 431 |
| mate_retention_4 | 29876 | 7 | 2 | 427 |
| male_mate_retention_2 | 29876 | 7 | 2 | 418 |
| mate_retention_6 | 29875 | 7 | 6 | 417 |
| flat_share | 10737 | 3 | 1 | 173 |
| extra_pair_12 | 29873 | 7 | 6 | 389 |
| hormonal_strict | 30940 | 3 | TRUE | 161 |
| SJS_2 | 30956 | 2 | 2 | 107 |
| SJS_2R | 30956 | 2 | 1 | 107 |
| menstruation_2 | 4794 | 7 | 6 | 368 |
| extra_pair_5 | 29873 | 7 | 2 | 348 |
| choice_of_clothing_4 | 29884 | 7 | 6 | 343 |
| time_for_response | 29143 | 11 | [0.10, 2.15) | 531 |
| extra_pair_2 | 29874 | 7 | 6 | 336 |
| we_know_fertile_days | 30956 | 2 | FALSE | 96 |
| include_lax | 30952 | 3 | TRUE | 143 |
| included_lax | 17960 | 3 | cycling | 143 |
| self_esteem_1 | 29881 | 7 | 1 | 309 |
| BFI_consc_9 | 30956 | 5 | 3 | 219 |
| BFI_consc_9R | 30956 | 5 | 3 | 219 |
| living_situation | 30956 | 4 | living in all-female flatshare | 173 |
| choice_of_clothing_8 | 29882 | 7 | 6 | 291 |
| showy_clothes | 29882 | 7 | 6 | 291 |
| extra_pair_1 | 29867 | 3 | 1 | 124 |
| extra_pair_intimacy | 29867 | 3 | 1 | 124 |
| NARQ_admiration_3 | 29877 | 7 | 6 | 289 |
| estrogen_categories | 26785 | 4 | (300,600] | 153 |
| mate_retention_5 | 29876 | 7 | 6 | 265 |
| include_conservative | 30952 | 3 | TRUE | 112 |
| included_conservative | 14147 | 3 | cycling | 112 |
| SOI_R_6 | 30956 | 5 | 2 | 183 |
| SOI_R_6R | 30956 | 5 | 4 | 183 |
| prc_stirn_b_forward_counted | 27380 | 20 | 0.14 | 687 |
| fertile_fab | 27143 | 20 | 0.05 | 679 |
| fertile_forward_and_backward | 27143 | 20 | 0.05 | 679 |
| prc_stirn_b_backward_inferred | 27143 | 20 | 0.05 | 679 |
| SOI_R_5 | 30956 | 5 | 3 | 169 |
| extra_pair_13 | 29873 | 7 | 6 | 234 |
| extra_pair_sexual_fantasies | 29873 | 7 | 6 | 234 |
| choice_of_clothing_2 | 29892 | 7 | 1 | 233 |
| MV_2 | 30956 | 5 | 1 | 162 |
| attention | 29874 | 12 | 1.5 | 382 |
| choice_of_clothing_7 | 29881 | 7 | 6 | 222 |
| fertile | 27828 | 20 | 0.16 | 619 |
| attractiveness_finance_2 | 30956 | 5 | 5 | 152 |
| income_partner | 30956 | 5 | > 3000€ | 152 |
| NARQ_admiration_2 | 29877 | 7 | 6 | 211 |
| SOI_R_4 | 30956 | 5 | 1 | 148 |
| NARQ_admiration_1 | 29877 | 7 | 6 | 197 |
| BFI_open_10 | 30956 | 5 | 1 | 136 |
| choice_of_clothing_5 | 29885 | 7 | 6 | 186 |
| extra_pair_7 | 29873 | 7 | 6 | 186 |
| had_sex_with_partner_yet | 30956 | 2 | 0 | 53 |
| male_mate_retention | 29876 | 12 | 6 | 314 |
| relationship_satisfaction_1 | 29902 | 8 | 4.5 | 208 |
| extra_pair_9 | 29873 | 7 | 6 | 179 |
| ended.nachbe_other_hormonal | 30956 | 2 | TRUE | 51 |
| extra_pair_10 | 29873 | 7 | 6 | 178 |
| BFI_agree_6 | 30956 | 5 | 1 | 126 |
| BFI_agree_6R | 30956 | 5 | 5 | 126 |
| BFI_extra_7 | 30956 | 5 | 5 | 126 |
| BFI_extra_7R | 30956 | 5 | 1 | 126 |
| trying_to_get_pregnant | 30956 | 2 | 1 | 50 |
| prc_wcx_b_forward_counted | 27380 | 26 | 0.006 | 624 |
| CJS_6 | 30956 | 5 | 5 | 119 |
| prc_wcx_b_backward_inferred | 27143 | 26 | 0.001 | 603 |
| BFI_agree_4 | 30956 | 5 | 1 | 113 |
| NARQ_1 | 30956 | 5 | 5 | 112 |
| BFI_consc_4 | 30956 | 5 | 1 | 111 |
| BFI_consc_4R | 30956 | 5 | 5 | 111 |
| included_levels | 28493 | 5 | lax | 111 |
| SGSE_5 | 30956 | 5 | 5 | 111 |
| BFI_neuro_7 | 30956 | 5 | 5 | 108 |
| RJS_1 | 30956 | 5 | 5 | 108 |
| CJS_3 | 30956 | 5 | 1 | 106 |
| MV_P_2 | 30956 | 5 | 1 | 105 |
| SGSE_4 | 30956 | 5 | 5 | 104 |
| SGSE_4R | 30956 | 5 | 1 | 104 |
| extra_pair_going_out | 29873 | 12 | 5.5 | 242 |
| male_jealousy_1 | 29876 | 7 | 6 | 141 |
| male_jealousy_2 | 29877 | 7 | 6 | 138 |
| BFI_neuro_8 | 30956 | 5 | 1 | 98 |
| NARQ_6 | 30956 | 5 | 5 | 98 |
| SOI_R_3 | 30956 | 5 | 5 | 97 |
| cyklus_4 | 30956 | 5 | 1 | 95 |
| include_strict | 30952 | 3 | TRUE | 57 |
| included_bool | 7682 | 3 | TRUE | 57 |
| included_strict | 7682 | 3 | cycling | 57 |
| extra_pair_1b | 399 | 3 | 1 | 54 |
| extra_pair_sex | 30924 | 3 | 1 | 54 |
| fertile_cont | 17887 | 20 | 0.05 | 360 |
| prc_stirn_b | 17887 | 20 | 0.05 | 360 |
| extra_pair_8 | 29873 | 7 | 6 | 125 |
| RJS_3 | 30956 | 5 | 5 | 89 |
| BFI_neuro_6 | 30956 | 5 | 1 | 87 |
| BFI_neuro_6R | 30956 | 5 | 5 | 87 |
| relationship_status_clean | 30956 | 3 | Verlobt | 52 |
| BFI_agree_1 | 30956 | 5 | 5 | 86 |
| BFI_agree_1R | 30956 | 5 | 1 | 86 |
| BFI_consc_8 | 30956 | 5 | 1 | 85 |
| BFI_consc_8R | 30956 | 5 | 5 | 85 |
| BFI_neuro_5 | 30956 | 5 | 5 | 85 |
| BFI_neuro_5R | 30956 | 5 | 1 | 85 |
| BFI_extra_2 | 30956 | 5 | 1 | 84 |
| BFI_extra_2R | 30956 | 5 | 5 | 84 |
| MV_P_5 | 30956 | 5 | 1 | 84 |
| MV_P_5R | 30956 | 5 | 5 | 84 |
| extra_pair_4 | 29873 | 7 | 6 | 116 |
| BFI_agree_8 | 30956 | 5 | 1 | 82 |
| BFI_agree_8R | 30956 | 5 | 5 | 82 |
| BFI_neuro_3 | 30956 | 5 | 1 | 81 |
| SGSE_1 | 30956 | 5 | 5 | 81 |
| ended | 30956 | 2 | FALSE | 32 |
| attractiveness_stp_self | 30956 | 5 | 1 | 79 |
| CJS_2 | 30956 | 5 | 5 | 79 |
| attractiveness_finance_1 | 30956 | 5 | 5 | 78 |
| extra_pair_compliments | 29873 | 12 | 6 | 187 |
| male_jealousy_3 | 29876 | 7 | 6 | 109 |
| SGSE_3 | 30956 | 5 | 5 | 76 |
| SGSE_3R | 30956 | 5 | 1 | 76 |
| attractiveness_stp | 30956 | 5 | 1 | 75 |
| NARQ_7 | 30956 | 5 | 1 | 75 |
| extra_pair_11 | 29873 | 7 | 6 | 102 |
| NARQ_rivalry_1 | 29876 | 7 | 6 | 100 |
| MV_5 | 30956 | 5 | 1 | 71 |
| MV_5R | 30956 | 5 | 5 | 71 |
| NARQ_3 | 30956 | 5 | 5 | 71 |
| SOI_R_7 | 30956 | 5 | 5 | 70 |
| NARQ_rivalry_2 | 29877 | 7 | 6 | 97 |
| BFI_neuro_2 | 30956 | 5 | 1 | 69 |
| BFI_neuro_2R | 30956 | 5 | 5 | 69 |
| prc_stirn_b_squished | 17944 | 20 | 0.16 | 271 |
| has_not_had_sex_yet | 30956 | 2 | 1 | 27 |
| BFI_neuro_1 | 30956 | 5 | 5 | 67 |
| cyklus_2 | 30956 | 5 | 2 | 66 |
| pregnant | 30956 | 2 | 1 | 26 |
| pregnant_in_last_3_months | 30956 | 2 | yes | 26 |
| NARQ_15 | 30956 | 5 | 5 | 64 |
| NARQ_16 | 30956 | 5 | 5 | 62 |
| cycle_length_groups | 30956 | 4 | (35,41] | 49 |
| BFI_extra_8 | 30956 | 5 | 1 | 61 |
| contraceptive_methods | 30357 | 5 | fertility_awareness | 61 |
| SOI_R_2 | 30956 | 5 | 5 | 60 |
| RJS_5 | 30956 | 5 | 5 | 59 |
| BFI_open_7 | 30956 | 5 | 1 | 58 |
| BFI_open_7R | 30956 | 5 | 5 | 58 |
| RJS_6 | 30956 | 5 | 5 | 58 |
| BFI_open_5 | 30956 | 5 | 1 | 56 |
| ZIP_7 | 30956 | 5 | 1 | 56 |
| ZIP_7R | 30956 | 5 | 5 | 56 |
| NARQ_11 | 30956 | 5 | 1 | 55 |
| want_more_info | 26606 | 3 | 0 | 33 |
| prc_wcx_b | 17887 | 26 | 0 | 277 |
| prc_wcx_b_squished | 17944 | 26 | 0.018 | 271 |
| change_contraception_1 | 26606 | 3 | 1 | 31 |
| BFI_open_9 | 30956 | 5 | 1 | 51 |
| BFI_open_9R | 30956 | 5 | 5 | 51 |
| CJS_1 | 30956 | 5 | 5 | 51 |
| attractiveness_relativ_1 | 30956 | 4 | 4 | 40 |
| breast_feeding | 30956 | 2 | 1 | 20 |
| breast_feeding_in_last_3_months | 30956 | 2 | 1 | 20 |
| SGSE_2 | 30956 | 5 | 5 | 49 |
| BFI_open_1 | 30956 | 5 | 1 | 47 |
| age_group | 30956 | 6 | (45,70] | 56 |
| BFI_extra_6 | 30956 | 5 | 1 | 45 |
| BFI_open_6 | 30956 | 5 | 1 | 44 |
| biggest_diff | 30956 | 10 | [ 2.04, 2.13) | 88 |
| NARQ_18 | 30956 | 5 | 1 | 44 |
| NARQ_9 | 30956 | 5 | 5 | 44 |
| BFI_consc_2 | 30956 | 5 | 1 | 43 |
| BFI_consc_2R | 30956 | 5 | 5 | 43 |
| NARQ_5 | 30956 | 5 | 1 | 43 |
| MV_P_1 | 30956 | 5 | 5 | 42 |
| attractiveness_finance_self | 13229 | 6 | 5 | 49 |
| income | 13229 | 6 | > 3000€ | 49 |
| MV_4 | 30956 | 5 | 1 | 40 |
| avg_diff | 30885 | 11 | [1.055, 1.09) | 84 |
| BFI_extra_5 | 30956 | 5 | 1 | 38 |
| BFI_extra_5R | 30956 | 5 | 5 | 38 |
| sexy_clothes | 30924 | 18 | 6 | 133 |
| breakup | 26606 | 3 | 1 | 22 |
| communication_partner_2 | 28857 | 8 | 6 | 58 |
| BFI_extra_4 | 30956 | 5 | 1 | 36 |
| BFI_extra_3 | 30956 | 5 | 1 | 34 |
| BFI_neuro_4 | 30956 | 5 | 1 | 34 |
| NARQ_rivalry_3 | 29876 | 7 | 6 | 46 |
| BFI_agree_5 | 30956 | 5 | 1 | 32 |
| SOI_R_8 | 30956 | 5 | 5 | 32 |
| religiosity | 30956 | 5 | 5 | 31 |
| RJS_4 | 30956 | 5 | 5 | 31 |
| days_responded_percentage | 30885 | 11 | [0.95122,0.976) | 67 |
| CJS_5 | 30956 | 5 | 5 | 30 |
| NARQ_8 | 30956 | 5 | 5 | 30 |
| RJS_2 | 30956 | 5 | 5 | 30 |
| BFI_consc_5 | 30956 | 5 | 1 | 29 |
| certainty_menstruation | 30956 | 5 | 1 | 29 |
| cyklus_6 | 30956 | 5 | 2 | 29 |
| SOI_R_9 | 30956 | 5 | 5 | 29 |
| attractiveness_occupation | 30956 | 5 | 1 | 28 |
| contraceptive_method | 30956 | 5 | other | 27 |
| MV_P_4 | 30956 | 5 | 1 | 27 |
| NARQ_admiration | 29877 | 17 | 6 | 88 |
| SOI_uv | 30956 | 13 | 1.33333333333333 | 67 |
| SJS | 30956 | 7 | 2 | 36 |
| BFI_open_8 | 30956 | 5 | 1 | 25 |
| sexual_intercourse_satisfaction | 6511 | 8 | 1 | 38 |
| BFI_consc_6 | 30956 | 5 | 1 | 23 |
| contraceptive_method_by_pearl | 30956 | 5 | fertility_awareness | 23 |
| attractiveness_finance | 30956 | 9 | 5 | 41 |
| CJS_4 | 30956 | 5 | 5 | 22 |
| MV_1 | 30956 | 5 | 5 | 22 |
| BFI_extra_1 | 30956 | 5 | 1 | 21 |
| NARQ_4 | 30956 | 5 | 5 | 21 |
| BFI_agree_9 | 30956 | 5 | 1 | 20 |
| BFI_open_4 | 30956 | 5 | 1 | 20 |
| NARQ_10 | 30956 | 5 | 5 | 19 |
| ZIP_5 | 30956 | 5 | 1 | 19 |
| BFI_agree_3 | 30956 | 5 | 1 | 18 |
| BFI_agree_3R | 30956 | 5 | 5 | 18 |
| BFI_consc_7 | 30956 | 5 | 1 | 18 |
| no_variation | 29891 | 3 | TRUE | 10 |
| BFI_consc_1 | 30956 | 5 | 1 | 16 |
| MV_3 | 30956 | 5 | 1 | 15 |
| NARQ_12 | 30956 | 5 | 5 | 15 |
| NARQ_13 | 30956 | 5 | 5 | 15 |
| ZIP_2 | 30956 | 5 | 1 | 15 |
| ZIP_4 | 30956 | 5 | 1 | 14 |
| ZIP_4R | 30956 | 5 | 5 | 14 |
| new_relationship | 26606 | 3 | 1 | 8 |
| BFI_agree_2 | 30956 | 5 | 1 | 13 |
| MV_P_3 | 30956 | 5 | 1 | 13 |
| satisfaction_sexual_intercourse | 29562 | 6 | 1 | 15 |
| days_with_partner_per_month | 9538 | 6 | > 14 days | 14 |
| duration_pill | 16212 | 6 | 1 | 14 |
| long_distance_relationship_2 | 9538 | 6 | 5 | 14 |
| BFI_consc_3 | 30956 | 5 | 1 | 11 |
| ZIP_3 | 30956 | 5 | 1 | 11 |
| BFI_agree_7 | 30956 | 5 | 1 | 10 |
| occupation_clean | 30850 | 7 | Nicht berufstätig | 13 |
| NARQ_2 | 30956 | 5 | 5 | 9 |
| SOI_R_1 | 30956 | 5 | 5 | 9 |
| sexy | 30956 | 5 | 1 | 8 |
| attractiveness_face | 30956 | 5 | 1 | 7 |
| attractiveness_overall | 30956 | 5 | 1 | 7 |
| BFI_open_2 | 30956 | 5 | 1 | 7 |
| BFI_open_3 | 30956 | 5 | 1 | 7 |
| NARQ_17 | 30956 | 5 | 5 | 7 |
| ZIP_1 | 30956 | 5 | 1 | 7 |
| ZIP_6 | 30956 | 5 | 1 | 7 |
| partner_mate_retention | 29874 | 22 | 6 | 30 |
| number_of_cycles | 30924 | 6 | 5 | 8 |
| RCD_squished | 18018 | 31 | 0 | 41 |
| attractiveness_ltp | 30956 | 5 | 1 | 6 |
| partner_attractiveness_longterm | 30956 | 5 | -3.97818492731748 | 6 |
| cycle_nr | 30924 | 6 | 5 | 7 |
| duration_relationship_months | 30956 | 13 | 12 | 15 |
| attractiveness_ltp_self | 30956 | 5 | 1 | 5 |
| contraceptives_broad_categories | 30952 | 11 | partner_sterilised | 11 |
| distance_partner | 9538 | 8 | 6 | 8 |
| distance_to_partner_hours | 9538 | 8 | 9-12h | 8 |
| partner_attractiveness_money | 30956 | 13 | 1 | 12 |
| attractiveness_body | 30956 | 5 | 1 | 4 |
| SOI_im | 30956 | 13 | 5 | 9 |
| religion_clean | 30881 | 6 | Judentum | 4 |
| MV_short | 30956 | 13 | 1 | 8 |
| NARQ_14 | 30956 | 5 | 5 | 3 |
| gestagen_ug | 2 | 2 | 250 | 1 |
| male_jealousy | 29876 | 17 | 5.33333333333333 | 8 |
| NARQ_rivalry | 29876 | 17 | 5.66666666666667 | 8 |
| method_meeting_clean | 16222 | 7 | Depotspritze | 3 |
| extra_pair_flirting | 29873 | 17 | 5.66666666666667 | 7 |
| MV | 30956 | 17 | 1.8 | 7 |
| SOI_be | 30956 | 13 | 4.66666666666667 | 5 |
| SGSE | 30956 | 21 | 4.8 | 8 |
| contraceptives_categories_natural | 29911 | 6 | infertile | 2 |
| estrogen | 195 | 3 | 30 | 1 |
| estrogen_ug | 1497 | 6 | 15 | 2 |
| estrogen_ug_merged | 2157 | 6 | 15 | 2 |
| female_jealousy | 30924 | 18 | 5.66666666666667 | 6 |
| gestagen | 195 | 3 | 13 | 1 |
| hormonal_1 | 1716 | 6 | 4 | 2 |
| extra_pair_desire | 29873 | 27 | 5.8 | 7 |
| estrogen_ug_other | 1315 | 4 | 420 | 1 |
| partner_attractiveness_shortterm | 30956 | 16 | -3.34512824374663 | 4 |
| MV_P_short | 30956 | 13 | 1 | 3 |
| partner_attractiveness_physical | 30956 | 9 | 2 | 2 |
| sex_orientation | 30956 | 5 | pansexuell | 1 |
| RJS | 30956 | 25 | 4.83333333333333 | 4 |
| hormonal_2 | 1716 | 9 | circlet | 1 |
| gestagens | 1497 | 10 | NGT | 1 |
| gestagens_ug | 1497 | 10 | 125 | 1 |
| estrogen_ug_all | 26785 | 11 | 360 | 1 |
| estrogen_ug_cycle | 17120 | 12 | 360 | 1 |
| gestagen_type | 16178 | 12 | NES | 1 |
| gestagen_ug_merged | 15610 | 12 | 1500 | 1 |
| menarche | 13225 | 12 | 19 | 1 |
| in_pair_desire | 29874 | 56 | 4.4 | 4 |
| occupation | 30956 | 14 | Beamtin | 1 |
| BFI_neuro | 30956 | 32 | 1.125 | 2 |
| gestagen_cycle | 14567 | 16 | 1.44 | 1 |
| intensive_sports | 26606 | 16 | 14 | 1 |
| SOI_R | 30956 | 35 | 4.22222222222222 | 2 |
| MV_P | 30956 | 18 | 4.8 | 1 |
| cigarettes | 26606 | 19 | 11 | 1 |
| choice_of_clothing | 29881 | 41 | 5.5 | 2 |
| cycle_length | 30956 | 21 | 39 | 1 |
| cyklus_3 | 30956 | 21 | 39 | 1 |
| day_count | 30956 | 42 | 42 | 2 |
| days | 30956 | 42 | 25 | 2 |
| fertile_days_known_backward | 30956 | 42 | 41 | 2 |
| first_time | 29943 | 21 | 10 | 1 |
| MV_diff | 30956 | 24 | -3.66666666666667 | 1 |
| CJS | 30956 | 25 | 5 | 1 |
| ZIP | 30956 | 27 | 1.28571428571429 | 1 |
| partner_attractiveness_global | 30956 | 29 | -2.95278253338198 | 1 |
| female_mate_retention | 30924 | 30 | 3.33333333333333 | 1 |
| BFI_extra | 30956 | 31 | 1.125 | 1 |
| BFI_agree | 30956 | 32 | 1.33333333333333 | 1 |
| NARQ_K_total | 29876 | 32 | 5.83333333333333 | 1 |
| BFI_open | 30956 | 33 | 1.5 | 1 |
| BFI_consc | 30956 | 34 | 1.22222222222222 | 1 |
| age | 30956 | 42 | 54 | 1 |
| fertile_days_known_backward_inferred | 30956 | 43 | 42 | 1 |
| fertile_days_known_forward | 30956 | 43 | 42 | 1 |
| n_days | 30956 | 43 | 42 | 1 |
| height | 30956 | 46 | 116 | 1 |
| number_sexual_partner | 30956 | 47 | 100 | 1 |
| NARQ | 30956 | 56 | 1 | 1 |
| extra_pair | 29873 | 62 | 5.75 | 1 |
| partner_attractiveness_rel_to_self | 30956 | 65 | -0.162454954632523 | 1 |
| weight_post | 26606 | 68 | 102 | 1 |
| contraception | 30952 | 73 | coitus_interruptus, temperature_billings | 1 |
| weight | 30956 | 79 | 101 | 1 |
| last_lag | 27362 | 95 | 107 | 1 |
| cycle_length_diary | 18018 | 111 | 101 | 1 |
| median_cycle_length_diary | 26728 | 127 | 10 | 1 |
| estrogen_ug_per_kg | 17120 | 143 | 10.2083333333333 | 1 |
| attractiveness_income_age_corrected | 30956 | 148 | -0.049531448419045 | 1 |
| mean_cycle_length_diary | 30924 | 155 | 10 | 1 |
| timespan | 30924 | 181 | 101 | 1 |
| took_days | 30924 | 181 | 101 | 1 |
| minimum_cycle_length_diary | 30924 | 197 | 109 | 1 |
| duration_relationship_total | 30820 | 206 | 110 | 1 |
| duration_relationship_years | 30820 | 206 | 10.25 | 1 |
| menstrual_onset_days_until | 18614 | 219 | -105 | 1 |
| RCD | 18614 | 219 | -105 | 1 |
| RCD_for_merge | 18614 | 219 | 106 | 1 |
| RCD_rel_to_ovulation | 18614 | 219 | -101 | 1 |
| FCD | 30924 | 270 | 123 | 1 |
| menstrual_onset_days_since | 30924 | 270 | 122 | 1 |
| day_number | 30924 | 314 | 159 | 1 |
| RCD_inferred | 30924 | 446 | -106 | 1 |
| BMI | 26606 | 568 | 10.8108108108108 | 1 |
| fertile_mean | 30956 | 888 | 0.0111111111111111 | 1 |
# drop
diary = diary %>%
# durations redundant with total duration, use age group, sex orientation has overly rare categories
select(-duration_relationship_months,-duration_relationship_years, -age, -sex_orientation
)
## round/group
diary = diary %>% mutate(
# height = round_any(height, 5), # rounding would be nice but leaves extreme values out there
# weight = round_any(weight, 5),
# BMI = round_any(BMI, 2),
duration_relationship_total = round_any(duration_relationship_total/12, 0.3)
# first_time = round_any(first_time, 3)
) %>%
select(
-estrogen_ug_per_kg # derived factors from weight can allow people to recompute weight
)
## cap
na_if_lt = function(x, lt) {
x = as.numeric(x)
if_else(x < lt, NA_real_, x)
}
na_if_gt = function(x, gt) {
x = as.numeric(x)
if_else(x > gt, NA_real_, x)
}
diary = diary %>% mutate(
RCD = na_if_lt(RCD, -40),
RCD_rel_to_ovulation = na_if_lt(RCD_rel_to_ovulation, -20),
RCD_inferred = na_if_gt(na_if_lt(RCD_inferred, -40), 30),
RCD_for_merge = na_if_gt(RCD_for_merge, 50),
FCD = na_if_gt(FCD, 40),
day_number = na_if_gt(day_number, 40),
menstrual_onset_days_since = na_if_gt(menstrual_onset_days_since, 40),
menstrual_onset_days_until = na_if_lt(menstrual_onset_days_until, -40),
minimum_cycle_length_diary = na_if_gt(minimum_cycle_length_diary, 40),
took_days = na_if_gt(took_days, 40),
timespan = na_if_gt(timespan, 40),
mean_cycle_length_diary = na_if_gt(mean_cycle_length_diary, 40),
median_cycle_length_diary = na_if_gt(mean_cycle_length_diary, 40),
cycle_length_diary = na_if_gt(mean_cycle_length_diary, 40),
last_lag = na_if_gt(last_lag, 40),
number_sexual_partner = na_if_gt(number_sexual_partner, 30)
)
diary %>% gather(variable, value, -person) %>%
group_by(variable) %>%
summarise(n = n_nonmissing(value), n_dist = n_distinct(value), lc_val = commonness_rarest_value(value, person, T), lc_freq = commonness_rarest_value(value, person, F)) %>%
arrange(desc(lc_freq/(n_dist+lc_freq))) %>%
pander()
| variable | n | n_dist | lc_val | lc_freq |
|---|---|---|---|---|
| ended.vorab | 30956 | 1 | TRUE | 1208 |
| gender | 30956 | 1 | 1 | 1208 |
| hetero_relationship | 30956 | 1 | 1 | 1208 |
| weekend | 30956 | 2 | TRUE | 1122 |
| time_since_last_response | 23275 | 2 | [10,31] | 1097 |
| menstruated_at_all | 30956 | 2 | TRUE | 910 |
| in_pair_public_intimacy | 29902 | 3 | 0 | 1081 |
| mate_retention_1 | 29902 | 3 | 2 | 960 |
| spent_night_with_partner | 29902 | 3 | 0 | 960 |
| fertile_broad_backward_inferred | 17013 | 3 | 0.0533333333333333 | 959 |
| had_sexual_intercourse | 29902 | 3 | 1 | 945 |
| premenstrual_phase_backward_inferred | 27143 | 3 | TRUE | 929 |
| premenstrual_phase_fab | 27143 | 3 | TRUE | 929 |
| fertile_broad_forward_counted | 16201 | 3 | 0.0533333333333333 | 928 |
| fertile_window_forward_counted | 16201 | 3 | broad | 919 |
| fertile_window_backward_inferred | 17013 | 3 | broad | 915 |
| menstruation_1 | 29901 | 3 | 1 | 910 |
| fertile_narrow_forward_counted | 12407 | 3 | 0.51 | 908 |
| fertile_narrow_backward_inferred | 13345 | 3 | 0.51 | 905 |
| had_petting | 29901 | 3 | 1 | 904 |
| pill_contraception | 30956 | 2 | 0 | 598 |
| premenstrual_phase_forward_counted | 27380 | 3 | TRUE | 873 |
| contraception_meeting_partner | 30956 | 2 | 2 | 581 |
| pill_in_last_3_months | 30956 | 2 | 0 | 566 |
| pille_control | 30956 | 2 | 2 | 566 |
| menstruation | 30956 | 3 | pre | 819 |
| premenstrual_phase | 17887 | 3 | TRUE | 819 |
| premenstrual_phase_squished | 17944 | 3 | TRUE | 807 |
| sufficient_diary_coverage | 30956 | 2 | FALSE | 518 |
| partner_initiated_sexual_intercourse | 6512 | 3 | 0 | 772 |
| sexual_intercourse_3 | 6512 | 3 | 1 | 772 |
| hormonal_contraception | 30956 | 2 | 0 | 514 |
| menstrual_onset | 18614 | 3 | TRUE | 726 |
| fertile_broad_squished | 11877 | 3 | 0.44 | 697 |
| fertile_broad | 11682 | 3 | 0.44 | 694 |
| fertile_window_squished | 11877 | 3 | broad | 683 |
| fertile_window | 11682 | 3 | broad | 675 |
| fertile_narrow | 9648 | 3 | 0.51 | 670 |
| fertile_narrow_squished | 9702 | 3 | 0.51 | 667 |
| ended.abschluss | 30956 | 2 | FALSE | 439 |
| hormonal_contra | 30952 | 3 | TRUE | 602 |
| hormonal_all | 30952 | 3 | FALSE | 585 |
| mate_retention_2 | 29902 | 4 | 1 | 758 |
| dodgy_data | 30924 | 3 | TRUE | 566 |
| any_RCD | 30956 | 2 | FALSE | 368 |
| SJS_5 | 30956 | 2 | 2 | 357 |
| SJS_5R | 30956 | 2 | 1 | 357 |
| hormonal | 30952 | 3 | FALSE | 514 |
| SJS_6 | 30956 | 2 | 2 | 312 |
| ever_menstruated | 30956 | 2 | FALSE | 298 |
| had_any_period | 30956 | 2 | FALSE | 298 |
| sexual_intercourse_2 | 29902 | 4 | 2 | 595 |
| SJS_4 | 30956 | 2 | 2 | 286 |
| sexual_intercourse_5 | 29901 | 4 | 2 | 567 |
| include_all | 30952 | 3 | TRUE | 421 |
| included | 28493 | 3 | cycling | 421 |
| included_all | 28493 | 3 | cycling | 421 |
| antibiotics | 30956 | 2 | 1 | 279 |
| hormonal_medication_in_last_3_months | 30956 | 2 | 1 | 279 |
| weekday | 30924 | 8 | Saturday | 1034 |
| hormonal_lax | 30952 | 3 | TRUE | 374 |
| week_number | 30924 | 7 | (28,35] | 833 |
| stress | 26606 | 3 | 1 | 355 |
| days_with_partner | 30956 | 3 | 3-5 days | 346 |
| cohabitation | 30956 | 3 | Live in same city | 334 |
| long_distance_relationship | 30956 | 3 | 2 | 334 |
| sexual_intercourse_1 | 29902 | 6 | 1 | 663 |
| sexual_intercourse_1_6scale | 29902 | 6 | 1.2 | 663 |
| medicament_1 | 26606 | 3 | 1 | 330 |
| nights_with_partner | 30956 | 3 | 3-5 nights | 329 |
| attention_2 | 29874 | 7 | 6 | 766 |
| SJS_3 | 30956 | 2 | 2 | 213 |
| SJS_3R | 30956 | 2 | 1 | 213 |
| male_attention_1 | 29874 | 7 | 2 | 739 |
| situation_of_living | 18107 | 3 | 1 | 294 |
| hormonal_conservative | 30940 | 3 | TRUE | 293 |
| menstruation_3 | 4794 | 4 | 3 | 390 |
| menstruation_strength | 30956 | 4 | 3 | 390 |
| days_since_menstrual_onset | 4068 | 6 | 4 | 571 |
| SJS_1 | 30956 | 2 | 2 | 188 |
| illness_1 | 26606 | 3 | 1 | 271 |
| children_broad_categories | 30956 | 2 | children | 180 |
| mate_retention_3 | 29876 | 7 | 6 | 594 |
| desirability_1 | 29881 | 7 | 1 | 562 |
| choice_of_clothing_3 | 29887 | 7 | 1 | 554 |
| communication_partner_1 | 29902 | 4 | 1 | 311 |
| choice_of_clothing_6 | 29883 | 7 | 6 | 541 |
| desirability_partner | 29879 | 7 | 1 | 537 |
| time_of_response | 30924 | 11 | [17.0,17.3) | 835 |
| any_fertile_days_known | 30956 | 2 | FALSE | 147 |
| attention_1 | 29874 | 7 | 2 | 510 |
| jealousy_1 | 29877 | 7 | 6 | 463 |
| extra_pair_3 | 29873 | 7 | 6 | 451 |
| cycle_regularity | 30956 | 3 | irregular, more than 5 days off | 192 |
| cyklus_5 | 30956 | 3 | 3 | 192 |
| extra_pair_6 | 29873 | 7 | 2 | 445 |
| male_mate_retention_1 | 29876 | 7 | 6 | 444 |
| choice_of_clothing_1 | 29896 | 7 | 6 | 431 |
| mate_retention_4 | 29876 | 7 | 2 | 427 |
| male_mate_retention_2 | 29876 | 7 | 2 | 418 |
| mate_retention_6 | 29875 | 7 | 6 | 417 |
| flat_share | 10737 | 3 | 1 | 173 |
| extra_pair_12 | 29873 | 7 | 6 | 389 |
| hormonal_strict | 30940 | 3 | TRUE | 161 |
| SJS_2 | 30956 | 2 | 2 | 107 |
| SJS_2R | 30956 | 2 | 1 | 107 |
| menstruation_2 | 4794 | 7 | 6 | 368 |
| extra_pair_5 | 29873 | 7 | 2 | 348 |
| choice_of_clothing_4 | 29884 | 7 | 6 | 343 |
| time_for_response | 29143 | 11 | [0.10, 2.15) | 531 |
| extra_pair_2 | 29874 | 7 | 6 | 336 |
| we_know_fertile_days | 30956 | 2 | FALSE | 96 |
| include_lax | 30952 | 3 | TRUE | 143 |
| included_lax | 17960 | 3 | cycling | 143 |
| self_esteem_1 | 29881 | 7 | 1 | 309 |
| BFI_consc_9 | 30956 | 5 | 3 | 219 |
| BFI_consc_9R | 30956 | 5 | 3 | 219 |
| living_situation | 30956 | 4 | living in all-female flatshare | 173 |
| choice_of_clothing_8 | 29882 | 7 | 6 | 291 |
| showy_clothes | 29882 | 7 | 6 | 291 |
| extra_pair_1 | 29867 | 3 | 1 | 124 |
| extra_pair_intimacy | 29867 | 3 | 1 | 124 |
| NARQ_admiration_3 | 29877 | 7 | 6 | 289 |
| estrogen_categories | 26785 | 4 | (300,600] | 153 |
| mate_retention_5 | 29876 | 7 | 6 | 265 |
| include_conservative | 30952 | 3 | TRUE | 112 |
| included_conservative | 14147 | 3 | cycling | 112 |
| SOI_R_6 | 30956 | 5 | 2 | 183 |
| SOI_R_6R | 30956 | 5 | 4 | 183 |
| prc_stirn_b_forward_counted | 27380 | 20 | 0.14 | 687 |
| fertile_fab | 27143 | 20 | 0.05 | 679 |
| fertile_forward_and_backward | 27143 | 20 | 0.05 | 679 |
| prc_stirn_b_backward_inferred | 27143 | 20 | 0.05 | 679 |
| SOI_R_5 | 30956 | 5 | 3 | 169 |
| extra_pair_13 | 29873 | 7 | 6 | 234 |
| extra_pair_sexual_fantasies | 29873 | 7 | 6 | 234 |
| choice_of_clothing_2 | 29892 | 7 | 1 | 233 |
| MV_2 | 30956 | 5 | 1 | 162 |
| attention | 29874 | 12 | 1.5 | 382 |
| choice_of_clothing_7 | 29881 | 7 | 6 | 222 |
| fertile | 27828 | 20 | 0.16 | 619 |
| attractiveness_finance_2 | 30956 | 5 | 5 | 152 |
| income_partner | 30956 | 5 | > 3000€ | 152 |
| NARQ_admiration_2 | 29877 | 7 | 6 | 211 |
| SOI_R_4 | 30956 | 5 | 1 | 148 |
| NARQ_admiration_1 | 29877 | 7 | 6 | 197 |
| BFI_open_10 | 30956 | 5 | 1 | 136 |
| choice_of_clothing_5 | 29885 | 7 | 6 | 186 |
| extra_pair_7 | 29873 | 7 | 6 | 186 |
| had_sex_with_partner_yet | 30956 | 2 | 0 | 53 |
| male_mate_retention | 29876 | 12 | 6 | 314 |
| relationship_satisfaction_1 | 29902 | 8 | 4.5 | 208 |
| extra_pair_9 | 29873 | 7 | 6 | 179 |
| ended.nachbe_other_hormonal | 30956 | 2 | TRUE | 51 |
| extra_pair_10 | 29873 | 7 | 6 | 178 |
| BFI_agree_6 | 30956 | 5 | 1 | 126 |
| BFI_agree_6R | 30956 | 5 | 5 | 126 |
| BFI_extra_7 | 30956 | 5 | 5 | 126 |
| BFI_extra_7R | 30956 | 5 | 1 | 126 |
| trying_to_get_pregnant | 30956 | 2 | 1 | 50 |
| prc_wcx_b_forward_counted | 27380 | 26 | 0.006 | 624 |
| CJS_6 | 30956 | 5 | 5 | 119 |
| prc_wcx_b_backward_inferred | 27143 | 26 | 0.001 | 603 |
| BFI_agree_4 | 30956 | 5 | 1 | 113 |
| NARQ_1 | 30956 | 5 | 5 | 112 |
| BFI_consc_4 | 30956 | 5 | 1 | 111 |
| BFI_consc_4R | 30956 | 5 | 5 | 111 |
| included_levels | 28493 | 5 | lax | 111 |
| SGSE_5 | 30956 | 5 | 5 | 111 |
| BFI_neuro_7 | 30956 | 5 | 5 | 108 |
| RJS_1 | 30956 | 5 | 5 | 108 |
| CJS_3 | 30956 | 5 | 1 | 106 |
| MV_P_2 | 30956 | 5 | 1 | 105 |
| SGSE_4 | 30956 | 5 | 5 | 104 |
| SGSE_4R | 30956 | 5 | 1 | 104 |
| extra_pair_going_out | 29873 | 12 | 5.5 | 242 |
| male_jealousy_1 | 29876 | 7 | 6 | 141 |
| male_jealousy_2 | 29877 | 7 | 6 | 138 |
| BFI_neuro_8 | 30956 | 5 | 1 | 98 |
| NARQ_6 | 30956 | 5 | 5 | 98 |
| SOI_R_3 | 30956 | 5 | 5 | 97 |
| cyklus_4 | 30956 | 5 | 1 | 95 |
| include_strict | 30952 | 3 | TRUE | 57 |
| included_bool | 7682 | 3 | TRUE | 57 |
| included_strict | 7682 | 3 | cycling | 57 |
| extra_pair_1b | 399 | 3 | 1 | 54 |
| extra_pair_sex | 30924 | 3 | 1 | 54 |
| fertile_cont | 17887 | 20 | 0.05 | 360 |
| prc_stirn_b | 17887 | 20 | 0.05 | 360 |
| extra_pair_8 | 29873 | 7 | 6 | 125 |
| RJS_3 | 30956 | 5 | 5 | 89 |
| BFI_neuro_6 | 30956 | 5 | 1 | 87 |
| BFI_neuro_6R | 30956 | 5 | 5 | 87 |
| relationship_status_clean | 30956 | 3 | Verlobt | 52 |
| BFI_agree_1 | 30956 | 5 | 5 | 86 |
| BFI_agree_1R | 30956 | 5 | 1 | 86 |
| BFI_consc_8 | 30956 | 5 | 1 | 85 |
| BFI_consc_8R | 30956 | 5 | 5 | 85 |
| BFI_neuro_5 | 30956 | 5 | 5 | 85 |
| BFI_neuro_5R | 30956 | 5 | 1 | 85 |
| BFI_extra_2 | 30956 | 5 | 1 | 84 |
| BFI_extra_2R | 30956 | 5 | 5 | 84 |
| MV_P_5 | 30956 | 5 | 1 | 84 |
| MV_P_5R | 30956 | 5 | 5 | 84 |
| extra_pair_4 | 29873 | 7 | 6 | 116 |
| BFI_agree_8 | 30956 | 5 | 1 | 82 |
| BFI_agree_8R | 30956 | 5 | 5 | 82 |
| BFI_neuro_3 | 30956 | 5 | 1 | 81 |
| SGSE_1 | 30956 | 5 | 5 | 81 |
| ended | 30956 | 2 | FALSE | 32 |
| attractiveness_stp_self | 30956 | 5 | 1 | 79 |
| CJS_2 | 30956 | 5 | 5 | 79 |
| attractiveness_finance_1 | 30956 | 5 | 5 | 78 |
| extra_pair_compliments | 29873 | 12 | 6 | 187 |
| male_jealousy_3 | 29876 | 7 | 6 | 109 |
| SGSE_3 | 30956 | 5 | 5 | 76 |
| SGSE_3R | 30956 | 5 | 1 | 76 |
| attractiveness_stp | 30956 | 5 | 1 | 75 |
| NARQ_7 | 30956 | 5 | 1 | 75 |
| extra_pair_11 | 29873 | 7 | 6 | 102 |
| NARQ_rivalry_1 | 29876 | 7 | 6 | 100 |
| MV_5 | 30956 | 5 | 1 | 71 |
| MV_5R | 30956 | 5 | 5 | 71 |
| NARQ_3 | 30956 | 5 | 5 | 71 |
| SOI_R_7 | 30956 | 5 | 5 | 70 |
| NARQ_rivalry_2 | 29877 | 7 | 6 | 97 |
| BFI_neuro_2 | 30956 | 5 | 1 | 69 |
| BFI_neuro_2R | 30956 | 5 | 5 | 69 |
| prc_stirn_b_squished | 17944 | 20 | 0.16 | 271 |
| has_not_had_sex_yet | 30956 | 2 | 1 | 27 |
| BFI_neuro_1 | 30956 | 5 | 5 | 67 |
| cyklus_2 | 30956 | 5 | 2 | 66 |
| pregnant | 30956 | 2 | 1 | 26 |
| pregnant_in_last_3_months | 30956 | 2 | yes | 26 |
| NARQ_15 | 30956 | 5 | 5 | 64 |
| day_number | 28729 | 42 | 35 | 534 |
| NARQ_16 | 30956 | 5 | 5 | 62 |
| cycle_length_groups | 30956 | 4 | (35,41] | 49 |
| BFI_extra_8 | 30956 | 5 | 1 | 61 |
| contraceptive_methods | 30357 | 5 | fertility_awareness | 61 |
| SOI_R_2 | 30956 | 5 | 5 | 60 |
| RJS_5 | 30956 | 5 | 5 | 59 |
| BFI_open_7 | 30956 | 5 | 1 | 58 |
| BFI_open_7R | 30956 | 5 | 5 | 58 |
| RJS_6 | 30956 | 5 | 5 | 58 |
| BFI_open_5 | 30956 | 5 | 1 | 56 |
| ZIP_7 | 30956 | 5 | 1 | 56 |
| ZIP_7R | 30956 | 5 | 5 | 56 |
| NARQ_11 | 30956 | 5 | 1 | 55 |
| want_more_info | 26606 | 3 | 0 | 33 |
| prc_wcx_b | 17887 | 26 | 0 | 277 |
| prc_wcx_b_squished | 17944 | 26 | 0.018 | 271 |
| change_contraception_1 | 26606 | 3 | 1 | 31 |
| BFI_open_9 | 30956 | 5 | 1 | 51 |
| BFI_open_9R | 30956 | 5 | 5 | 51 |
| CJS_1 | 30956 | 5 | 5 | 51 |
| attractiveness_relativ_1 | 30956 | 4 | 4 | 40 |
| breast_feeding | 30956 | 2 | 1 | 20 |
| breast_feeding_in_last_3_months | 30956 | 2 | 1 | 20 |
| SGSE_2 | 30956 | 5 | 5 | 49 |
| BFI_open_1 | 30956 | 5 | 1 | 47 |
| age_group | 30956 | 6 | (45,70] | 56 |
| BFI_extra_6 | 30956 | 5 | 1 | 45 |
| BFI_open_6 | 30956 | 5 | 1 | 44 |
| biggest_diff | 30956 | 10 | [ 2.04, 2.13) | 88 |
| NARQ_18 | 30956 | 5 | 1 | 44 |
| NARQ_9 | 30956 | 5 | 5 | 44 |
| BFI_consc_2 | 30956 | 5 | 1 | 43 |
| BFI_consc_2R | 30956 | 5 | 5 | 43 |
| NARQ_5 | 30956 | 5 | 1 | 43 |
| MV_P_1 | 30956 | 5 | 5 | 42 |
| attractiveness_finance_self | 13229 | 6 | 5 | 49 |
| income | 13229 | 6 | > 3000€ | 49 |
| MV_4 | 30956 | 5 | 1 | 40 |
| avg_diff | 30885 | 11 | [1.055, 1.09) | 84 |
| BFI_extra_5 | 30956 | 5 | 1 | 38 |
| BFI_extra_5R | 30956 | 5 | 5 | 38 |
| sexy_clothes | 30924 | 18 | 6 | 133 |
| breakup | 26606 | 3 | 1 | 22 |
| communication_partner_2 | 28857 | 8 | 6 | 58 |
| BFI_extra_4 | 30956 | 5 | 1 | 36 |
| BFI_extra_3 | 30956 | 5 | 1 | 34 |
| BFI_neuro_4 | 30956 | 5 | 1 | 34 |
| NARQ_rivalry_3 | 29876 | 7 | 6 | 46 |
| BFI_agree_5 | 30956 | 5 | 1 | 32 |
| SOI_R_8 | 30956 | 5 | 5 | 32 |
| religiosity | 30956 | 5 | 5 | 31 |
| RJS_4 | 30956 | 5 | 5 | 31 |
| days_responded_percentage | 30885 | 11 | [0.95122,0.976) | 67 |
| CJS_5 | 30956 | 5 | 5 | 30 |
| NARQ_8 | 30956 | 5 | 5 | 30 |
| RJS_2 | 30956 | 5 | 5 | 30 |
| BFI_consc_5 | 30956 | 5 | 1 | 29 |
| certainty_menstruation | 30956 | 5 | 1 | 29 |
| cyklus_6 | 30956 | 5 | 2 | 29 |
| SOI_R_9 | 30956 | 5 | 5 | 29 |
| attractiveness_occupation | 30956 | 5 | 1 | 28 |
| contraceptive_method | 30956 | 5 | other | 27 |
| MV_P_4 | 30956 | 5 | 1 | 27 |
| NARQ_admiration | 29877 | 17 | 6 | 88 |
| SOI_uv | 30956 | 13 | 1.33333333333333 | 67 |
| SJS | 30956 | 7 | 2 | 36 |
| BFI_open_8 | 30956 | 5 | 1 | 25 |
| sexual_intercourse_satisfaction | 6511 | 8 | 1 | 38 |
| BFI_consc_6 | 30956 | 5 | 1 | 23 |
| contraceptive_method_by_pearl | 30956 | 5 | fertility_awareness | 23 |
| attractiveness_finance | 30956 | 9 | 5 | 41 |
| CJS_4 | 30956 | 5 | 5 | 22 |
| MV_1 | 30956 | 5 | 5 | 22 |
| BFI_extra_1 | 30956 | 5 | 1 | 21 |
| NARQ_4 | 30956 | 5 | 5 | 21 |
| BFI_agree_9 | 30956 | 5 | 1 | 20 |
| BFI_open_4 | 30956 | 5 | 1 | 20 |
| NARQ_10 | 30956 | 5 | 5 | 19 |
| ZIP_5 | 30956 | 5 | 1 | 19 |
| BFI_agree_3 | 30956 | 5 | 1 | 18 |
| BFI_agree_3R | 30956 | 5 | 5 | 18 |
| BFI_consc_7 | 30956 | 5 | 1 | 18 |
| no_variation | 29891 | 3 | TRUE | 10 |
| BFI_consc_1 | 30956 | 5 | 1 | 16 |
| MV_3 | 30956 | 5 | 1 | 15 |
| NARQ_12 | 30956 | 5 | 5 | 15 |
| NARQ_13 | 30956 | 5 | 5 | 15 |
| ZIP_2 | 30956 | 5 | 1 | 15 |
| ZIP_4 | 30956 | 5 | 1 | 14 |
| ZIP_4R | 30956 | 5 | 5 | 14 |
| new_relationship | 26606 | 3 | 1 | 8 |
| BFI_agree_2 | 30956 | 5 | 1 | 13 |
| MV_P_3 | 30956 | 5 | 1 | 13 |
| satisfaction_sexual_intercourse | 29562 | 6 | 1 | 15 |
| days_with_partner_per_month | 9538 | 6 | > 14 days | 14 |
| duration_pill | 16212 | 6 | 1 | 14 |
| long_distance_relationship_2 | 9538 | 6 | 5 | 14 |
| BFI_consc_3 | 30956 | 5 | 1 | 11 |
| ZIP_3 | 30956 | 5 | 1 | 11 |
| FCD | 27288 | 41 | 40 | 89 |
| menstrual_onset_days_since | 27380 | 42 | 39 | 89 |
| BFI_agree_7 | 30956 | 5 | 1 | 10 |
| occupation_clean | 30850 | 7 | Nicht berufstätig | 13 |
| NARQ_2 | 30956 | 5 | 5 | 9 |
| SOI_R_1 | 30956 | 5 | 5 | 9 |
| sexy | 30956 | 5 | 1 | 8 |
| attractiveness_face | 30956 | 5 | 1 | 7 |
| attractiveness_overall | 30956 | 5 | 1 | 7 |
| BFI_open_2 | 30956 | 5 | 1 | 7 |
| BFI_open_3 | 30956 | 5 | 1 | 7 |
| NARQ_17 | 30956 | 5 | 5 | 7 |
| ZIP_1 | 30956 | 5 | 1 | 7 |
| ZIP_6 | 30956 | 5 | 1 | 7 |
| partner_mate_retention | 29874 | 22 | 6 | 30 |
| number_of_cycles | 30924 | 6 | 5 | 8 |
| RCD_squished | 18018 | 31 | 0 | 41 |
| attractiveness_ltp | 30956 | 5 | 1 | 6 |
| partner_attractiveness_longterm | 30956 | 5 | -3.97818492731748 | 6 |
| RCD_rel_to_ovulation | 17731 | 37 | -20 | 44 |
| cycle_nr | 30924 | 6 | 5 | 7 |
| attractiveness_ltp_self | 30956 | 5 | 1 | 5 |
| contraceptives_broad_categories | 30952 | 11 | partner_sterilised | 11 |
| distance_partner | 9538 | 8 | 6 | 8 |
| distance_to_partner_hours | 9538 | 8 | 9-12h | 8 |
| partner_attractiveness_money | 30956 | 13 | 1 | 12 |
| attractiveness_body | 30956 | 5 | 1 | 4 |
| SOI_im | 30956 | 13 | 5 | 9 |
| religion_clean | 30881 | 6 | Judentum | 4 |
| MV_short | 30956 | 13 | 1 | 8 |
| NARQ_14 | 30956 | 5 | 5 | 3 |
| menstrual_onset_days_until | 17887 | 42 | -38 | 24 |
| RCD | 17887 | 42 | -38 | 24 |
| gestagen_ug | 2 | 2 | 250 | 1 |
| male_jealousy | 29876 | 17 | 5.33333333333333 | 8 |
| NARQ_rivalry | 29876 | 17 | 5.66666666666667 | 8 |
| method_meeting_clean | 16222 | 7 | Depotspritze | 3 |
| extra_pair_flirting | 29873 | 17 | 5.66666666666667 | 7 |
| MV | 30956 | 17 | 1.8 | 7 |
| SOI_be | 30956 | 13 | 4.66666666666667 | 5 |
| SGSE | 30956 | 21 | 4.8 | 8 |
| contraceptives_categories_natural | 29911 | 6 | infertile | 2 |
| estrogen | 195 | 3 | 30 | 1 |
| estrogen_ug | 1497 | 6 | 15 | 2 |
| estrogen_ug_merged | 2157 | 6 | 15 | 2 |
| female_jealousy | 30924 | 18 | 5.66666666666667 | 6 |
| gestagen | 195 | 3 | 13 | 1 |
| hormonal_1 | 1716 | 6 | 4 | 2 |
| RCD_inferred | 28104 | 72 | -27 | 22 |
| RCD_for_merge | 18097 | 51 | 49 | 14 |
| extra_pair_desire | 29873 | 27 | 5.8 | 7 |
| estrogen_ug_other | 1315 | 4 | 420 | 1 |
| partner_attractiveness_shortterm | 30956 | 16 | -3.34512824374663 | 4 |
| MV_P_short | 30956 | 13 | 1 | 3 |
| partner_attractiveness_physical | 30956 | 9 | 2 | 2 |
| minimum_cycle_length_diary | 23767 | 41 | 40 | 8 |
| RJS | 30956 | 25 | 4.83333333333333 | 4 |
| hormonal_2 | 1716 | 9 | circlet | 1 |
| gestagens | 1497 | 10 | NGT | 1 |
| gestagens_ug | 1497 | 10 | 125 | 1 |
| estrogen_ug_all | 26785 | 11 | 360 | 1 |
| estrogen_ug_cycle | 17120 | 12 | 360 | 1 |
| gestagen_type | 16178 | 12 | NES | 1 |
| gestagen_ug_merged | 15610 | 12 | 1500 | 1 |
| menarche | 13225 | 12 | 19 | 1 |
| in_pair_desire | 29874 | 56 | 4.4 | 4 |
| occupation | 30956 | 14 | Beamtin | 1 |
| BFI_neuro | 30956 | 32 | 1.125 | 2 |
| gestagen_cycle | 14567 | 16 | 1.44 | 1 |
| intensive_sports | 26606 | 16 | 14 | 1 |
| SOI_R | 30956 | 35 | 4.22222222222222 | 2 |
| MV_P | 30956 | 18 | 4.8 | 1 |
| cigarettes | 26606 | 19 | 11 | 1 |
| choice_of_clothing | 29881 | 41 | 5.5 | 2 |
| cycle_length | 30956 | 21 | 39 | 1 |
| cyklus_3 | 30956 | 21 | 39 | 1 |
| day_count | 30956 | 42 | 42 | 2 |
| days | 30956 | 42 | 25 | 2 |
| fertile_days_known_backward | 30956 | 42 | 41 | 2 |
| first_time | 29943 | 21 | 10 | 1 |
| MV_diff | 30956 | 24 | -3.66666666666667 | 1 |
| CJS | 30956 | 25 | 5 | 1 |
| ZIP | 30956 | 27 | 1.28571428571429 | 1 |
| number_sexual_partner | 30232 | 28 | 26 | 1 |
| partner_attractiveness_global | 30956 | 29 | -2.95278253338198 | 1 |
| female_mate_retention | 30924 | 30 | 3.33333333333333 | 1 |
| BFI_extra | 30956 | 31 | 1.125 | 1 |
| BFI_agree | 30956 | 32 | 1.33333333333333 | 1 |
| NARQ_K_total | 29876 | 32 | 5.83333333333333 | 1 |
| BFI_open | 30956 | 33 | 1.5 | 1 |
| BFI_consc | 30956 | 34 | 1.22222222222222 | 1 |
| last_lag | 25783 | 42 | 27 | 1 |
| timespan | 18527 | 42 | 26 | 1 |
| took_days | 18602 | 42 | 26 | 1 |
| fertile_days_known_backward_inferred | 30956 | 43 | 42 | 1 |
| fertile_days_known_forward | 30956 | 43 | 42 | 1 |
| n_days | 30956 | 43 | 42 | 1 |
| height | 30956 | 46 | 116 | 1 |
| NARQ | 30956 | 56 | 1 | 1 |
| extra_pair | 29873 | 62 | 5.75 | 1 |
| partner_attractiveness_rel_to_self | 30956 | 65 | -0.162454954632523 | 1 |
| weight_post | 26606 | 68 | 102 | 1 |
| cycle_length_diary | 22767 | 69 | 10 | 1 |
| mean_cycle_length_diary | 22767 | 69 | 10 | 1 |
| median_cycle_length_diary | 22767 | 69 | 10 | 1 |
| contraception | 30952 | 73 | coitus_interruptus, temperature_billings | 1 |
| weight | 30956 | 79 | 101 | 1 |
| duration_relationship_total | 30820 | 85 | 11.1 | 1 |
| attractiveness_income_age_corrected | 30956 | 148 | -0.049531448419045 | 1 |
| BMI | 26606 | 568 | 10.8108108108108 | 1 |
| fertile_mean | 30956 | 888 | 0.0111111111111111 | 1 |
Ideally, no participant should be uniquely identified, but this is not realistic. Instead, I look for unique combinations of demographic variables.
cut_to_number = function(x, group = NULL, m = 40) {
if(!is.null(group)) {
df = data.frame(x = x, group = group) %>% unique()
} else {
df = data.frame(x = x)
}
cuts = Hmisc::cut2(df$x, m = m, onlycuts = T) # cut on person level
as.numeric(as.character(Hmisc::cut2(x, cuts = cuts, levels.mean = T)))
}
unique_combos = diary %>%
group_by(age_group, height, weight, religion_clean, duration_relationship_total) %>%
summarise(n = n()) %>%
ungroup() %>%
arrange(desc(n)) %>%
filter(n < 7)
nrow(unique_combos)
## [1] 198
diary = diary %>%
mutate(
religion_clean = recode(religion_clean, "Judentum" = "other", "Islam" = "other", "Buddhismus" = "other", .missing = "other"),
weight = cut_to_number(weight,person, m = 50),
height = cut_to_number(height,person, m = 50),
first_time = cut_to_number(first_time, person),
duration_relationship_total = cut_to_number(duration_relationship_total, person, m = 50)
)
unique_combos = diary %>%
group_by(age_group, height, weight, religion_clean, duration_relationship_total) %>%
summarise(n = n()) %>%
ungroup() %>%
arrange(desc(n)) %>%
filter(n < 7)
nrow(unique_combos)
## [1] 186
# unique_combos %>%
# pander()
diary = diary %>% left_join(unique_combos %>% select(-n) %>% mutate(too_unique = T))
diary = diary %>% mutate(
too_unique = if_else(is.na(too_unique), F, too_unique),
age_group = if_else(!too_unique, age_group, NA_integer_),
height = if_else(too_unique, NA_real_, height),
weight = if_else(too_unique, NA_real_, weight),
religion_clean = if_else(!too_unique, religion_clean, NA_character_),
duration_relationship_total = if_else(too_unique, NA_real_, duration_relationship_total)
)
diary_noep = diary %>% select(-starts_with("SOI")) %>% select(-starts_with("extra_pair")) %>% select(-number_sexual_partner, -trying_to_get_pregnant, -has_not_had_sex_yet, -had_sex_with_partner_yet, -first_time)
save(diary_noep, file = "diary_restricted_anonymised.rdata")
haven::write_sav(diary_noep, path = "diary_restricted_anonymised.sav")
## Error in if (any(bad_lengths)) {: missing value where TRUE/FALSE needed
save(diary, file = "diary_anonymised.rdata")
haven::write_sav(diary, path = "diary_anonymised.sav")
## Error in if (any(bad_lengths)) {: missing value where TRUE/FALSE needed