Load packages
library(plyr); library(data.table); library(ggplot2); library(psych); library(broom)
Simulate data according to certain parameters
simulate_fertility_effect = function(
### settings
nr_of_people = 70,
nr_days = 35,
dayspan = 1:38,
miss_window = 4,
effects = list(
trait = 0.5,
fertility_effect = 0,
noise = 0.5
)
) {
#### simulate people
people = data.table(
person = 1:nr_of_people,
irrelevant_cov = rnorm(nr_of_people),
trait = rnorm(nr_of_people) # a stable component of the outcome
)
### cycle day reference
days = data.table(
rcd_day = c(29:1, 30:38),
FCD = c(1:38),
prc_stirn_b = c(.01, .01, .02, .03, .05, .09, .16, .27, .38, .48, .56, .58, .55, .48, .38, .28, .20, .14, .10, .07, .06, .04, .03, .02, .01, .01, .01, .01, .01, rep(.01, times = 9)),
# rep(.01, times = 70)), # gangestad uses .01 here, but I think such cases are better thrown than kept, since we might simply have missed a mens
prc_wcx_b = c(.000, .000, .001, .002, .004, .009, .018, .032, .050, .069, .085, .094, .093, .085, .073, .059, .047, .036, .028, .021, .016, .013, .010, .008, .007, .006, .005, .005, .005, rep(.005, times = 9))
)
days[, fertile_narrow := NA_real_] # most days we won't consider at all, because they are of uncertain fertility status
days[ between(rcd_day,15,19) , fertile_narrow := mean(prc_stirn_b, na.rm = T)] # these days are likely fertile
days[ between(rcd_day,4,12) , fertile_narrow := mean(prc_stirn_b, na.rm = T)] # these days are likely infertile
days[, fertile_broad := NA_real_] # most days we won't consider at all, because they are of uncertain fertility status
days[ between(rcd_day,14,22), fertile_broad := mean(prc_stirn_b, na.rm = T)] # these days are likely fertile
days[ between(rcd_day,4,12) , fertile_broad := mean(prc_stirn_b, na.rm = T)] # these days are likely infertile
#### simulate person days
i_length = sum(nr_days * nr_of_people)
person_days = data.table(row = 1:i_length, person = integer(i_length), days_taken_part = rep(nr_days, each = nr_days * nr_of_people))
setkey(person_days, row) # somehow gets reordered..
days_per_person = rep(nr_days, times = nr_of_people)
person_days$person = rep(people$person, each = nr_days)
which_days = function(day_nr) sample(dayspan, size = day_nr, replace = F)
person_days$rcd_day = unlist(lapply(days_per_person, FUN = which_days))
person_days = merge(person_days, people, by = "person")
person_days = merge(person_days, days, by = "rcd_day", all.x = T)
rcd_day_real = person_days$rcd_day
person_days[, rcd_day := rcd_day_real + rep(round(miss_window * rnorm(n = nr_days)), each = nr_of_people)]
person_days[ rcd_day < 1, rcd_day := 38L + rcd_day]
person_days[ rcd_day > 38L, rcd_day := rcd_day - 38L]
person_days = merge(person_days, days, by = "rcd_day", all.x = T, suffixes = c("","_m"))
person_days[, rcd_day_m := rcd_day] # swap in
person_days[, rcd_day := rcd_day_real] # swap back
setkey(person_days, row) # somehow gets reordered..
person_days[, fertile := prc_stirn_b_m]
person_days[, state :=
effects$trait * trait + # trait
effects$fertility_effect * prc_stirn_b # fertility effect
]
person_days[, state_m := state + effects$noise * rnorm(nrow(person_days))]
person_days[, state_m1 := state + effects$noise * rnorm(nrow(person_days))]
person_days[, state_m2 := state + effects$noise * rnorm(nrow(person_days))]
person_days[, state_m3 := state + effects$noise * rnorm(nrow(person_days))]
person_days[, state_m4 := state + effects$noise * rnorm(nrow(person_days))]
person_days[, state_m5 := state + effects$noise * rnorm(nrow(person_days))]
person_days
}
tidy_model = function(fit, data) {
library(broom)
tidied = tidy(fit, effects = "fixed", conf.int = T, conf.method = "Wald")
tidied$p.value = as.data.frame(summary(fit)$coef)[,'Pr(>|t|)']
tidied$p.value_KR = as.data.frame(summary(fit)$coef, ddf = "Kenward-Roger")[,'Pr(>|t|)']
tidied = tidied[tidied$term == 'fertile', ]
tidied$term = NULL
tidied$usable_days = sum(!is.na(data$fertile))
tidied$usable_days = dplyr::n_distinct( data[!is.na(data$fertile), person] )
tidied
}
fit_model = function(data, predictor = "prc_stirn_b_m", formula = state_m ~ fertile + (1 | person)) {
library(lme4); library(afex)
data$fertile = data[, predictor, with = F]
res = suppressMessages(mixed(formula, data = data, progress = FALSE))
coefs = data.frame(res$anova_table, check.names = FALSE)
fixefs = data.frame(summary(res$full.model)$coefficients, check.names = FALSE)
coefs = coefs[rownames(coefs) == "fertile", ]
fixefs = fixefs[rownames(fixefs) == "fertile", ]
rownames(coefs) = NULL
rownames(fixefs) = NULL
coefs = dplyr::select(dplyr::rename(cbind(coefs, fixefs),
p.value = `Pr(>F)`,
std.error = `Std. Error`,
statistic = `t value`,
estimate = Estimate
), p.value, std.error, statistic, estimate)
coefs
}
rdf_optional_stopping = function(data, stop_early = 20, nest = fit_model, ...) {
library(lme4); library(afex)
person_n = max(data$person)
if (stop_early > 0) {
data_r = data[person <= (person_n - stop_early), ]
} else {
data_r = data
}
tidied = nest(data_r, ...)
tidied$stopping.n = max(data_r$person)
if (tidied$p.value >= 0.05 & tidied$stopping.n < person_n) {
rdf_optional_stopping(data, stop_early - 10, nest = nest, ...)
} else {
tidied
}
}
rdf_covariate = function(data, cov = 0, nest = fit_model, ...) {
library(lme4); library(afex)
if (cov == 0) {
tidied = nest(data, formula = state_m ~ fertile + (1 | person), ...)
} else {
tidied = nest(data, formula = state_m ~ fertile + irrelevant_cov + (1 | person), ...)
}
tidied$covariate_used = cov
if (tidied$p.value >= 0.05 & cov == 0) {
rdf_covariate(data, 1, nest = nest, ...)
} else {
tidied
}
}
rdf_predictor = function(data, predictor_it = 1, nest = fit_model, ...) {
library(lme4); library(afex)
predictors = c("prc_stirn_b_m", "fertile_narrow_m", "fertile_broad_m")
predictor = predictors[predictor_it]
data$fertile = data[, predictor, with = F]
tidied = nest(data, ...)
tidied$predictor = predictor
if (tidied$p.value >= 0.05 & predictor_it < 3) {
rdf_predictor(data, predictor_it + 1, nest = nest, ...)
} else {
tidied
}
}
rdf_none = function(data) {
fit_model(data)
}
rdf_outcome_covariate = function(data) {
rdf_outcome(data, nest = rdf_covariate)
}
rdf_predictor_outcome_covariate = function(data) {
rdf_predictor(data, nest = rdf_outcome_covariate)
}
rdf_stopping_predictor_outcome_covariate = function(data) {
rdf_optional_stopping(data, nest = rdf_predictor_outcome_covariate)
}
rdf_stopping_predictor = function(data) {
rdf_optional_stopping(data, nest = rdf_predictor)
}