Simulation functions

Load packages

library(plyr); library(data.table); library(ggplot2); library(psych); library(broom)

Simulate data according to certain parameters

simulate_fertility_effect = function(
    ### settings
  nr_of_people = 70,
  nr_days = 35,
  dayspan = 1:38,
  miss_window = 4,
  effects = list(
    trait = 0.5,
    fertility_effect = 0,
    noise = 0.5
  )
) {

    #### simulate people
  people = data.table(
      person = 1:nr_of_people,
      irrelevant_cov = rnorm(nr_of_people),
      trait = rnorm(nr_of_people) # a stable component of the outcome
    )

  ### cycle day reference
  days = data.table(
    rcd_day = c(29:1, 30:38),
    FCD = c(1:38),
    prc_stirn_b = c(.01, .01, .02, .03, .05, .09, .16, .27, .38, .48, .56, .58, .55, .48, .38, .28, .20, .14, .10, .07, .06, .04, .03, .02, .01, .01, .01, .01, .01, rep(.01, times = 9)),
    #                   rep(.01, times = 70)), # gangestad uses .01 here, but I think such cases are better thrown than kept, since we might simply have missed a mens
    prc_wcx_b = c(.000, .000, .001, .002, .004, .009, .018, .032, .050, .069, .085, .094, .093, .085, .073, .059, .047, .036, .028, .021, .016, .013, .010, .008, .007, .006, .005, .005, .005, rep(.005, times = 9))
  )

  days[, fertile_narrow := NA_real_] # most days we won't consider at all, because they are of uncertain fertility status
  days[ between(rcd_day,15,19) , fertile_narrow := mean(prc_stirn_b, na.rm = T)] # these days are likely fertile
  days[ between(rcd_day,4,12) , fertile_narrow := mean(prc_stirn_b, na.rm = T)] # these days are likely infertile

  days[, fertile_broad := NA_real_] # most days we won't consider at all, because they are of uncertain fertility status
  days[ between(rcd_day,14,22), fertile_broad := mean(prc_stirn_b, na.rm = T)] # these days are likely fertile
  days[ between(rcd_day,4,12) , fertile_broad := mean(prc_stirn_b, na.rm = T)] # these days are likely infertile

  #### simulate person days

  i_length = sum(nr_days * nr_of_people)
  person_days = data.table(row = 1:i_length, person = integer(i_length), days_taken_part = rep(nr_days, each = nr_days * nr_of_people))
  setkey(person_days, row) # somehow gets reordered..

  days_per_person = rep(nr_days, times = nr_of_people)

  person_days$person = rep(people$person, each = nr_days)
  which_days = function(day_nr) sample(dayspan, size = day_nr, replace = F)
  person_days$rcd_day = unlist(lapply(days_per_person, FUN = which_days))

  person_days = merge(person_days, people, by = "person")
  person_days = merge(person_days, days, by = "rcd_day", all.x = T)
  rcd_day_real = person_days$rcd_day
  person_days[, rcd_day := rcd_day_real  + rep(round(miss_window * rnorm(n = nr_days)), each = nr_of_people)]
  person_days[ rcd_day < 1, rcd_day := 38L + rcd_day]
  person_days[ rcd_day > 38L, rcd_day := rcd_day - 38L]
  person_days = merge(person_days, days, by = "rcd_day", all.x = T, suffixes = c("","_m"))
  person_days[, rcd_day_m := rcd_day] # swap in
  person_days[, rcd_day := rcd_day_real] # swap back

  setkey(person_days, row) # somehow gets reordered..

  person_days[, fertile := prc_stirn_b_m]
  person_days[, state :=
                    effects$trait * trait + # trait
                    effects$fertility_effect * prc_stirn_b # fertility effect
  ]
  person_days[, state_m := state + effects$noise * rnorm(nrow(person_days))]
  person_days[, state_m1 := state + effects$noise * rnorm(nrow(person_days))]
  person_days[, state_m2 := state + effects$noise * rnorm(nrow(person_days))]
  person_days[, state_m3 := state + effects$noise * rnorm(nrow(person_days))]
  person_days[, state_m4 := state + effects$noise * rnorm(nrow(person_days))]
  person_days[, state_m5 := state + effects$noise * rnorm(nrow(person_days))]
  person_days
}

Tidy a model for our purposes (get CIs and p.values)

tidy_model = function(fit, data) {
    library(broom)
  tidied = tidy(fit, effects = "fixed", conf.int = T, conf.method = "Wald")
  tidied$p.value = as.data.frame(summary(fit)$coef)[,'Pr(>|t|)']
  tidied$p.value_KR = as.data.frame(summary(fit)$coef, ddf = "Kenward-Roger")[,'Pr(>|t|)']
  tidied = tidied[tidied$term == 'fertile', ]
  tidied$term = NULL
  tidied$usable_days = sum(!is.na(data$fertile))
  tidied$usable_days = dplyr::n_distinct( data[!is.na(data$fertile), person] )

  tidied
}

Fit model

fit_model = function(data, predictor = "prc_stirn_b_m", formula = state_m ~ fertile + (1 | person)) {
    library(lme4); library(afex)
  data$fertile = data[, predictor, with = F]
    res = suppressMessages(mixed(formula, data = data, progress = FALSE))

    coefs = data.frame(res$anova_table, check.names = FALSE)
    fixefs = data.frame(summary(res$full.model)$coefficients, check.names = FALSE)
    coefs = coefs[rownames(coefs) == "fertile", ]
    fixefs = fixefs[rownames(fixefs) == "fertile", ]
    rownames(coefs) = NULL
    rownames(fixefs) = NULL
    coefs = dplyr::select(dplyr::rename(cbind(coefs, fixefs),
         p.value = `Pr(>F)`,
         std.error = `Std. Error`,
         statistic = `t value`,
         estimate = Estimate
         ), p.value, std.error, statistic, estimate)
    coefs
}

Define various researcher degrees of freedom

A: Stop early (10 and 20 participants earlier) if p<.05

rdf_optional_stopping = function(data, stop_early = 20, nest = fit_model, ...) {
  library(lme4); library(afex)

  person_n = max(data$person)
  if (stop_early > 0) {
    data_r = data[person <= (person_n - stop_early), ]
  } else {
    data_r = data
  }
  tidied = nest(data_r, ...)
  tidied$stopping.n = max(data_r$person)

  if (tidied$p.value >= 0.05 & tidied$stopping.n < person_n) {
    rdf_optional_stopping(data, stop_early - 10, nest = nest, ...)
  } else {
    tidied
  }
}

B: Try with an irrelevant covariate if p>.05

rdf_covariate = function(data, cov = 0, nest = fit_model, ...) {
  library(lme4); library(afex)
  if (cov == 0) {
    tidied = nest(data, formula = state_m ~ fertile + (1 | person), ...)
  } else {
    tidied = nest(data, formula = state_m ~ fertile + irrelevant_cov + (1 | person), ...)
  }
  tidied$covariate_used = cov

  if (tidied$p.value >= 0.05 & cov == 0) {
    rdf_covariate(data, 1, nest = nest, ...)
  } else {
    tidied
  }
}

C: Try up to 5 correlated items if p>.05

rdf_outcome = function(data, outcome = 1, nest = fit_model, ...) {
  library(lme4); library(afex)
  data$state_m = data[, paste0("state_m",outcome), with = F]

  tidied = nest(data, ...)
  tidied$outcome = outcome

  if (tidied$p.value >= 0.05 & outcome < 5) {
    rdf_outcome(data, outcome + 1, nest = nest, ...)
  } else {
    tidied
  }
}

D: Try continuous, broad and narrow window as predictors if p>.05

rdf_predictor = function(data, predictor_it = 1, nest = fit_model, ...) {
  library(lme4); library(afex)
  predictors = c("prc_stirn_b_m", "fertile_narrow_m", "fertile_broad_m")
  predictor = predictors[predictor_it]
  data$fertile = data[, predictor, with = F]

  tidied = nest(data, ...)
  tidied$predictor = predictor

  if (tidied$p.value >= 0.05 & predictor_it < 3) {
    rdf_predictor(data, predictor_it + 1, nest = nest, ...)
  } else {
    tidied
  }
}

No researcher degrees of freedom

rdf_none = function(data) {
  fit_model(data)
}

AB

rdf_outcome_covariate = function(data) {
  rdf_outcome(data, nest = rdf_covariate)
}

ABC

rdf_predictor_outcome_covariate = function(data) {
  rdf_predictor(data, nest = rdf_outcome_covariate)
}

ABCD

rdf_stopping_predictor_outcome_covariate = function(data) {
  rdf_optional_stopping(data, nest = rdf_predictor_outcome_covariate)
}

CD

rdf_stopping_predictor = function(data) {
  rdf_optional_stopping(data, nest = rdf_predictor)
}