Helper functions used throughout

documentation on the functions is interspersed through code comments

set some options

library = function (...) suppressMessages(base::library(...))

options(stringsAsFactors = FALSE)
options(digits = 4)
options(scipen = 7)
options(width = 110)

Load packages

library(rmarkdown); library(knitr); library(formr); library(data.table); library(stringr); library(ggplot2); library(plyr);  library(psych); library(lubridate); library(car); library(psych); library(lme4);library(lmerTest); library(sjPlot); library(dplyr); library(dtplyr); library(tidyr);
library(svglite); library(knitr)
library(ggplot2); library(ggthemes); library(extrafont)
library(pander)

opts_chunk$set(dev = "png")

fool_packrat = function() {
  library(formatR)
  library(foreach)
  library(KernSmooth)
  library(GPArotation)
  library(nlme)
  library(devtools)
}

Package options

I’m tired of data table messing up my rmarkdown. Never want to print this in a live report anyway, I’m using pander for that.

options(datatable.print.nrows=0)
options(dplyr.print_max = 0)
options(dplyr.print_min = 0)
options(rmarkdown.df_print = FALSE)

always use these plot settings throughout

theme_set(theme_tufte(base_size = 20, base_family='Helvetica Neue'))

Auto-adjust the table column alignment depending on data type.

alignment = function (...) UseMethod('alignment')
alignment.default = function (...) 'left'
alignment.integer = function (...) 'right'
alignment.numeric = function (...) 'right'
panderOptions("table.split.table", Inf)

Helper functions

clean pipes

`%->%` = function(val, var) { assign(deparse(substitute(var)),val, envir = parent.frame()); invisible(val) }

a shorthand to quickly find objects of a certain class

ls_type = function(types=c('lmerMod','glmerMod','bglmerMod','blmerMod','merModLmerTest'), envir = parent.frame(), ...){
  inlist <- ls(envir = envir, ...)
  ifexistsgetclass = function(x, envir) {
    if(exists(x, envir = envir, inherits = FALSE)) {
      bla = get(x, envir = envir, inherits = FALSE)
      class(bla)
    } else {
      NULL
    }
  }
  classlist <- sapply(inlist,function(x) ifexistsgetclass(x, envir = envir))
  if (length(classlist) > 0 ) {
    names(classlist[
      sapply(classlist,function(x){ !!length(intersect(x, types) ) }) ## take any who have a class that is in our list
      ])
  } else character(0)
}

cut in with a printed message

and pass the object along

cat_in = function(obj, cat) {
  cat(cat)
  invisible(obj)
}

display error messages in HTML

don’t interrupt during iterated model fitting

cat_message = function(cat, class = "danger") {
  if (class == "danger") {
    prob = "Error"
  } else {
    prob = "Information"
  }
  cat(paste0("\n\n<div class='alert alert-",class,"'><strong>",prob,":</strong> ", cat, "</div>\n\n"))
}

print summary and pass on obj

print_summary = function(obj) {
  cat("\n\n```\n")
  print(summary(obj))
  cat("\n```\n\n")
  invisible(obj)
}

print confint and pass on obj

print_confint = function(obj) {
  print(confint(obj))
  invisible(obj)
}

calculate effects and pass obj

store them in attributes so that other functions can access

calculate_effects = function(obj, partial.residuals = F) {
  library(effects)
  if (length(attributes(obj)$effects) == 0) {
    attributes(obj)$effects = allEffects(obj, xlevels = 3, partial.residuals = partial.residuals)
  }
  invisible(obj)
}

plot (previously stored) effects

plot_all_effects = function(obj, multiline=F, partial.residuals = F) {
  library(effects)
  if(length(attributes(obj)$effects) == 0) {
    attributes(obj)$effects = allEffects(obj, xlevels = 3, partial.residuals = partial.residuals)
  }
  plot(attributes(obj)$effects,multiline=multiline, rug = F)
  invisible(obj)
}

substitute my own lme tidier

my_tidy.lme = function(fit, effects = "fixed") {
  if (effects == "fixed") {
    confintervals = data.frame(nlme::intervals(fit)$fixed)
    confintervals$term = rownames(confintervals)
    mydf = confintervals[confintervals$term != "(Intercept)", ]
    names(mydf) = c("conf.low", "estimate", "conf.high", "term")
    pvals = data.frame(nlme:::summary.lme(fit)$tTable)
    names(pvals) = c("estimate","std.error","df","statistic","p.value")
    pvals$term = rownames(pvals)
    mydf = merge(mydf, pvals[, c("term","p.value","std.error","statistic")], by = 'term')

    rownames(mydf) = NULL
    mydf = mydf[, c("term", "estimate", "std.error", "statistic", "p.value", "conf.low","conf.high")]
    mydf
  } else {
    stop("Only fixed effects.")
  }
}

Plot sjp interaction

plot_sjp_int = function(obj) {
  library(effects)
  library(sjPlot)
  sjp_eff = sjp.int(obj, type = 'eff', showCI = T, plevel=1, printPlot = F, moderatorValues = 'minmax' )
  sjp_plot = sjp_eff$plot.list[[1]]
  print(sjp_plot)
  invisible(obj)
}

Overwrite function for old apiversion

formr_items = function (survey_name, host = "https://formr.org")
{
  resp = httr::GET(paste0(host, "/admin/survey/", survey_name,
                          "/export_item_table?format=json"))
  if (resp$status_code == 200) {
    item_list = jsonlite::fromJSON(simplifyDataFrame = FALSE,
                                   httr::content(resp, encoding = "utf8", as = "text"))
    for (i in seq_along(item_list)) {
      if (item_list[[i]]$type == "rating_button") {
        from = 1
        to = 5
        by = 1
        if (!is.null(item_list[[i]]$type_options)) {
          sequence = stringr::str_split(item_list[[i]]$type_options,
                                        ",")[[1]]
          if (length(sequence) == 3) {
            from = as.numeric(sequence[1])
            to = as.numeric(sequence[2])
            by = as.numeric(sequence[3])
          }
          else if (length(sequence) == 2) {
            from = as.numeric(sequence[1])
            to = as.numeric(sequence[2])
          }
          else if (length(sequence) == 1) {
            to = as.numeric(sequence[1])
          }
        }
        sequence = seq(from, to, by)
        names(sequence) = sequence
        sequence[1] = item_list[[i]]$choices[[1]]
        sequence[length(sequence)] = item_list[[i]]$choices[[2]]
        item_list[[i]]$choices = as.list(sequence)
      }
    }
    class(item_list) = c("formr_item_list", class(item_list))
    item_list
  }
  else stop("This survey does not exist.")
}
assignInNamespace("formr_items",formr_items,ns="formr")

n excluded

count how many more are set to false (compared to internal counter)

excluded_old = 0
n_excluded = function(x) {
  excluded_new = sum(is.na(x) | x == FALSE,na.rm = T)
  if (is.null(excluded_old)) {
    excluded = excluded_new
  } else {
    excluded = excluded_new - excluded_old
  }
  cat(excluded, "excluded\n")
  excluded_old <<- excluded_new
  excluded
}

recode a var into a properly ordered factor

recode_ordered = function(levels, var) {
  factor(var, levels = sort(unique(var)), labels = levels[sort(unique(var))])
}

make a bar plot with counts and %ages

bar_count = function(data, variable, na.rm = FALSE) {
  varname = deparse(substitute(variable))
  var = data %>% select_(varname) %>% .[[1]]
  if (na.rm == T) {
    var = var %>% na.omit()
  }
  var = factor(var, exclude = NULL)
  data$var = var

  ggplot(data, aes(x = var)) +
    geom_bar() +
    stat_count(aes(label = paste(..count.., "\n", scales::percent(round(..count../sum(count),2)))), hjust = -0.1, geom = "text", position = "identity", na.rm = T) +
    scale_y_continuous(expand = c(0.1, 0)) +
    xlab(varname) +
    coord_flip()
}

fixed sqrt transformation for ggplot

from https://github.com/hadley/ggplot2/issues/980

mysqrt_trans <- function() {
  library(scales)
  domain <- c(0, Inf)
  transform <- base::sqrt
  range <- transform(domain)
  trans_new("mysqrt",
            transform = transform,
            inverse = function(x) squish(x, range=range)^2,
            domain = domain)
}

curve plot

plot residuals/raw values over reverse cycle days relative to ovulation

plot_curve = function(model, diary, caption_x = "Days until next menstruation") {
  outcome = names(model@frame)[1]
  options = list(fig.path = paste0(knitr::opts_chunk$get("fig.path"), outcome, "-curve-"),
                 cache.path = paste0(knitr::opts_chunk$get("cache.path"), outcome, "-curve-"))
  asis_knit_child("_plot_curve.Rmd", options = options)
}

moderated curve

plot_moderated_curve = function(obj, diary, moderator, modlabel, include = c("cycling")) {
  outcome = names(obj@frame)[1]
  new_form = update.formula(formula(obj), new = . ~ . - fertile * included )
  library(ggplot2)
  options(warn = -1)

  diary %>%
    filter(!is.na(included)) %>%
    mutate(.,residuals = residuals(update(obj, new_form, data = ., na.action = na.exclude))
    ) %>%
    filter(!is.na(RCD_rel_to_ovulation) & RCD_rel_to_ovulation <= 15 & RCD_rel_to_ovulation > -15, !is.na(residuals)) ->
    tmp
  # tmp$real = FALSE
  tmp_before = tmp
  tmp_before$RCD_rel_to_ovulation = tmp_before$RCD_rel_to_ovulation - 30
  tmp_after = tmp
  tmp_after$RCD_rel_to_ovulation = tmp_after$RCD_rel_to_ovulation + 30
  # tmp$real = TRUE
  tmp = rbind(tmp_before, tmp, tmp_after)

  tryCatch({
    trend_plot = ggplot(tmp,aes_string(x = "RCD_rel_to_ovulation", y = "residuals", colour = moderator)) + stat_smooth(geom = 'smooth',size = 0.8, fill = "#9ECAE1", method = 'gam', formula = y ~ s(x))
  }, error = function(e){cat_message(e, "danger")})
  tryCatch({
    trend_data = ggplot_build(trend_plot)$data[[1]]
  }, error = function(e){cat_message(e, "danger")})

  trend_data$RCD_rel_to_ovulation = round(trend_data$x)
  trend_data = merge(trend_data, unique(data.frame(diary[, list(RCD_rel_to_ovulation,fertile_cont)])), by = "RCD_rel_to_ovulation", all.x = T)

  trend_data %>%
    filter(RCD_rel_to_ovulation > -15 & RCD_rel_to_ovulation <= 15) %>%
    mutate(superimposed = ( ( (fertile_cont - 0.01)/0.58) * (max(y)-min(y) ) ) + min(y) ) ->
    trend_data

  trend_data$mod = factor(trend_data$group)
  levels(trend_data$mod) = levels(model.frame(as.formula(paste0("~",moderator)), diary)[, 1])
  ymax = max(trend_data$ymax)
  ymin = min(trend_data$ymin)

  plot = ggplot(trend_data) +
    geom_smooth(aes(x = x, y = y, ymin = ymin,ymax = ymax, colour = mod, fill = mod), size = 0.8, stat = "identity", alpha = 0.2) +
    scale_x_continuous("Cycle days relative to estimated day of ovulation (at 0)",limits=c(-14.4,15.5)) +
    geom_line(aes(x= x, y = superimposed), color = "#a83fbf", size = 1, linetype = 'dashed') +
    annotate("text",x = -2,  y = max(trend_data$superimposed,na.rm=T) * 1.2, label = 'superimposed fertility peak', color = "#a83fbf") +
    scale_y_continuous(outcome, limits = c(ymin, ymax)) +
    ggtitle("Smoothed") +
    scale_color_hue(modlabel) +
    scale_fill_hue(modlabel)
  print(plot)
  options(warn = 0)

  invisible(obj)
}


plot_triptych = function(obj, x.var = 'fertile', multiline=F, partial.residuals = F, xlevels = 3, term = NULL, panel_rows = 2) {
  library(effects)
  if(length(attributes(obj)$effects) == 0) {
    attributes(obj)$effects = allEffects(obj, xlevels = xlevels, partial.residuals = partial.residuals)
  }
  if(is.null(term)) {
    interaction = attributes(obj)$effects[
      which.max(stringr::str_length(names(attributes(obj)$effects)))
      ]
  } else {
    interaction = attributes(obj)$effects[ names(attributes(obj)$effects) == term ]
  }
  if (multiline) {
    panel_rows = 1
    colors = c("red","black")
  } else {
    colors = "black"
  }
  layout = c(xlevels,panel_rows,1)
  plot(interaction,x.var = x.var, multiline = multiline, layout = layout, colors = colors, rug = F)
  invisible(obj)
}

Intra-individual correlations meta-analysed for Kelly

compute_intra_individual_correlations = function(obj, diary, corrs) {
  library(dplyr)
  library(compute.es)
  library(metafor)
  outcome = as.formula(paste("~ ", as.character((formula(obj)[2]))))
  diary %>%
    filter(include_lax == T) %>%
    mutate_(outcome = outcome) %>%
    group_by(person) %>%
    summarise(days = n(), correlation = cor(outcome, fertile_cont, use ='pairwise.complete.obs')) %>%
    mutate(cohens.d = res(correlation, n = days, verbose = F)$d,
           hedges.g = des(cohens.d, n.1 = floor(days/2), n.2 = ceiling(days/2), verbose= F)$g) %->%
    correlations %>%
    escalc(measure = "COR", ri = correlation, ni = days, data = .) %->%
    hedges
  rma(hedges$yi, hedges$vi) %->%
    meta_anal

  correlations$outcome = as.character((formula(obj)[2]))
  corrs <<- rbind(corrs, select(correlations, outcome, person, days, correlation))
  forest(meta_anal)
  print(meta_anal)
  invisible(obj)
}

Switch window to broad window

switch_window_to_broad = function(obj, diary) {
  tryCatch({
    library(lmerTest)
    outcome = names(obj@frame)[1]
    if (diary %>% ungroup() %>%  select( one_of(outcome) ) %>% unique() %>% na.omit() %>% nrow() == 2) {
        form = formula(obj)
        glmer(form, data = diary2, family = binomial(link = 'probit')) ->
        broad_window
    } else {
      form = formula(obj)
      lmer(form, data = diary2) ->
      broad_window
    }

  }, error = function(e) { cat_message(e, "danger") })
  broad_window %>%
    print_summary() %>%
    plot_all_effects() %>%
    cat_in("\n\n\n#### Diagnostics {.accordion}\n\n") %>%
    print_diagnostics()
  invisible(broad_window)
}

Adjust for self esteem

adjust_for_self_esteem = function(obj, diary) {
  tryCatch({
    library(lmerTest)
    outcome = names(obj@frame)[1]
    form = update.formula(formula(obj), . ~ . + self_esteem_1)
    if (diary %>% ungroup() %>% select( one_of(outcome) ) %>% unique() %>% nrow() == 2) {
      glmer(form, data = diary, family = binomial(link = 'probit')) ->
        adj_self_esteem
    } else {
      lmer(form, data = diary) ->
        adj_self_esteem
    }

  }, error = function(e) { cat_message(e, "danger") })
  adj_self_esteem %>%
    print_summary() %>%
    plot_all_effects()
  invisible(obj)
}

Robustness analyses

robustness_check_ovu_shift = function(obj, diary) {
  library(lme4); library(lmerTest); library(sjPlot)
  get_coefs = function(fit, model_name) {
    library(broom)
    if (class(fit) == "lme") {
      obj_coef = tryCatch({my_tidy.lme(fit, effects = 'fixed') }, error = function(e) { cat_message(e, "danger") })
    } else {
      obj_coef = tryCatch({tidy(fit, conf.int = TRUE, effects = "fixed")}, error = function(e) { cat_message(e, "danger") })
    }
    obj_coef$model = model_name
    obj_coef
  }
  outcome = as.character(formula(obj)[2])
  outcome_label = recode(str_replace_all(str_replace_all(str_replace_all(outcome, "_", " "), " pair", "-pair"), " 1", ""),
                         "desirability" = "self-perceived desirability",
                         "NARQ admiration" = "narcissistic admiration",
                         "NARQ rivalry" = "narcissistic rivalry",
                         "extra-pair" = "extra-pair desire & behaviour",
                         "had sexual intercourse" = "sexual intercourse")


  less_ctrl_formula = update.formula(formula(obj), new = . ~ . - included * menstruation - fertile_mean)
  tryCatch({
    update(obj, formula = less_ctrl_formula) -> fewer_controls
  }, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    one_ctrl_formula = update.formula(formula(obj), new = . ~ . - fertile_mean)
    update(obj, formula = one_ctrl_formula) -> dontcontrolavg
  }, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    diary_nododgy = diary %>% filter(dodgy_data == F)
    update(obj, data = diary_nododgy) -> nododgy
  }, error = function(e) { cat_message(e, "danger") })

  tryCatch({
  diary_broad = diary %>% mutate(fertile = fertile_broad)
  update(obj, formula = less_ctrl_formula, data = diary_broad) -> broad_window
}, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    diary_broad = diary %>% mutate(fertile = fertile_broad)
    update(obj, data = diary_broad) -> broad_window_ctrl
  }, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    diary_bci = diary %>% mutate(fertile = prc_stirn_b_backward_inferred)
    update(obj, formula = less_ctrl_formula, data = diary_bci) -> backward_inferred
  }, error = function(e) { cat_message(e, "danger") })

tryCatch({
  diary_narrow = diary %>% mutate(fertile = fertile_narrow)
  update(obj, formula = less_ctrl_formula, data = diary_narrow) -> narrow_window
}, error = function(e) { cat_message(e, "danger") })

tryCatch({
    diary_cont_fc = diary %>% mutate(fertile = prc_stirn_b_forward_counted)
    update(obj, data = diary_cont_fc) -> forward_counting
}, error = function(e) { cat_message(e, "danger") })

tryCatch({
  diary_narrow_fc = diary %>% mutate(fertile = fertile_narrow_forward_counted)
  update(obj, formula = less_ctrl_formula, data = diary_narrow_fc) -> forward_counting_narrow
}, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    diary_broad_fc = diary %>% mutate(fertile = fertile_broad_forward_counted)
    update(obj, formula = less_ctrl_formula, data = diary_broad_fc) -> forward_counting_broad
  }, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    diary_cont = diary %>% mutate(fertile = prc_stirn_b)
    update(obj, data = diary_cont) -> backward
  }, error = function(e) { cat_message(e, "danger") })

# tryCatch({
#   diary %>% mutate(fertile = prc_wcx_b) %>%
#     update(obj, data = .) -> wilcox
# }, error = function(e) { cat_message(e, "danger") })

tryCatch({
  ilax = diary %>% mutate(included = included_lax)
  update(obj, data = ilax) -> included_lax
}, error = function(e) { cat_message(e, "danger") })

tryCatch({
  if(outcome != 'self_esteem_1') {
    update(obj, formula = update.formula(formula(obj), new = . ~ . + self_esteem_1)) -> control_self_esteem
  }
}, error = function(e) { cat_message(e, "danger") })

tryCatch({
  iconser = diary %>% mutate(included = included_conservative)
  update(obj, data = iconser) -> included_conservative
}, error = function(e) { cat_message(e, "danger") })


tryCatch({
  istrict = diary %>% mutate(included = included_strict)
  update(obj, data = istrict) -> included_strict
}, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    no_short = diary %>% filter(is.na(cycle_length_diary) | cycle_length_diary >= 20)
    update(obj, data = no_short) -> no_cycles_shorter_than_20
  }, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    no_long = diary %>% filter(is.na(cycle_length_diary) | (cycle_length_diary >= 20 & cycle_length_diary <= 40))
    update(obj, data = no_long) -> cycles_between_20_40
  }, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    update(obj, formula = update.formula(formula(obj), new = . ~ . + weekday + week_number)) -> control_week
  }, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    update(obj, formula = update.formula(formula(obj), new = . ~ . + time_of_response + log10(time_for_response+1))) -> control_time_of_response
  }, error = function(e) { cat_message(e, "danger") })


  tryCatch({
    diary %>%
      mutate(fertile = prc_stirn_b_forward_counted) %>%
      filter(!is.na(fertile) & !is.na(included) & menstruation == "no") %>%
      filter_(lazyeval::interp( ~ !is.na(outcome), outcome = as.name(obj@frame %>% names() %>% .[1]))) %>%
      filter(!duplicated(person)) -> diary_tmp2
      form = update.formula(formula(obj), new = . ~ . - fertile_mean - menstruation - included:menstruation - (1 | person))
      environment(form) = environment()
      lm(formula = form, data = diary_tmp2) -> between_person
  }, error = function(e) { cat_message(e, "danger") })


  tryCatch({
    four_occ = diary %>%
      mutate(fertile = fertile_narrow_forward_counted,
             person_occasion = paste(person, fertile)) %>%
      filter(!is.na(fertile) & !is.na(included) & menstruation == "no") %>%
      filter_(lazyeval::interp( ~ !is.na(outcome), outcome = as.name(obj@frame %>% names() %>% .[1]))) %>%
      mutate(row_nr_even = row_number(person) %% 2,
             person_occasion = paste(person_occasion, row_nr_even)) %>%
      filter(!duplicated(person_occasion)) %>%
      group_by(person) %>%
      mutate(nr_days = n()) %>%
      filter(nr_days == 4)

    update(obj, formula = update.formula(formula(obj), new = . ~ . - fertile_mean - included:menstruation - menstruation) , data = four_occ) -> within_person_four_occasions
  }, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    avg_twotime = diary %>%
      mutate(fertile = fertile_broad) %>%
      filter(!is.na(fertile) & !is.na(included) & menstruation == "no") %>%
      select(person, fertile, included, one_of(outcome)) %>%
      group_by(included, person, fertile) %>%
      summarise_all(funs(mean(.,na.rm = T)))

    update(obj, formula = less_ctrl_formula , data = avg_twotime) -> within_person_avg_hilow
  }, error = function(e) { cat_message(e, "danger") })


  tryCatch({
    two_occ = diary %>%
      mutate(fertile = fertile_narrow_forward_counted,
             person_occasion = paste(person, fertile)) %>%
      filter(!is.na(fertile) & !is.na(included) & menstruation == "no") %>%
      filter_(lazyeval::interp( ~ !is.na(outcome), outcome = as.name(obj@frame %>% names() %>% .[1]))) %>%
      filter(!duplicated(person_occasion)) %>%
      group_by(person) %>%
      mutate(nr_days = n()) %>%
      filter(nr_days == 2)

      update(obj, formula = update.formula(formula(obj), new = . ~ . - fertile_mean - included:menstruation - menstruation) , data = two_occ) -> within_person_two_occasions
  }, error = function(e) { cat_message(e, "danger") })


  tryCatch({
    # findbars(formula(obj))[[1]]
    if (class(obj) != "glmerMod") {
      nlme::lme(fixed = nobars(formula(obj)), random = ~ 1 | person, data = diary, na.action = na.exclude, correlation = nlme::corAR1(form = ~ day_number | person), method = "REML") -> control_autocorrelation
    control_autocorrelation$call$fixed = control_autocorrelation$terms
    }
  }, error = function(e) { cat_message(e, "danger") })

  tryCatch({
    if (class(obj) != "glmerMod") {
      nlme::lme(fixed = nobars(formula(obj)), random = ~ 1 | person, data = diary, na.action = na.exclude, correlation = nlme::corARMA(form = ~ day_number | person, p = 1, q = 1), method = "REML") -> autocorrelation_moving_avg
    autocorrelation_moving_avg$call$fixed = autocorrelation_moving_avg$terms
    }
  }, error = function(e) { cat_message(e, "danger") })


  models_in_function = ls_type(c('lmerMod','glmerMod','bglmerMod','blmerMod','merModLmerTest','lme','lm'))
  coefs = rbindlist(lapply(models_in_function, FUN = function(x) { get_coefs(get(x), x) }), fill = TRUE)
  # eff_coefs = rbindlist(lapply(models_in_function, FUN = function(x) { get_eff_coefs(get(x), x) }), fill = TRUE)

  coefs$model  =  dplyr::recode_factor(factor(coefs$model),
  'obj' = 'M_1. Main model (all), BC+BCi',
  'included_lax' = 'M_e2. Lax exclusion criteria',
  'included_conservative' = 'M_e3. Conservative exclusion criteria',
  'included_strict' = 'M_e4. Strict exclusion criteria',
  'nododgy' = 'M_e5. Exclude potentially dodgy data',

  'backward' = 'M_p1. Continuous, BC',
  'forward_counting' = 'M_p2. Continuous, FC',
  'broad_window' = 'M_p3. Broad window, BC',
  'narrow_window' = 'M_p4. Narrow window, BC',
  'forward_counting_narrow' = 'M_p5. Narrow window, FC',
  'forward_counting_broad' = 'M_p6. Broad window, FC',
  'backward_inferred' = 'M_p7. BC from rep. cycle length, when onset unknown',
  'cycles_between_20_40' = 'M_p8. Only cycles 20-40 days long',
  'within_person_avg_hilow' = 'M_p9. Within, BC, average high/low all',

  'between_person' = 'M_d1. Between, FC (first obs. only)',
  'within_person_two_occasions' = 'M_d2. Within, FC, high/low 2 obs.',
  'within_person_four_occasions' = 'M_d3. Within, FC, two high/low 4 obs.',
  'no_cycles_shorter_than_20' = 'M_d4. No cycles shorter than 20 days',

    'control_self_esteem' = 'M_c1. Adjust for self esteem',
  'dontcontrolavg' = 'M_c2. No adjustment for avg. fertility',
  'fewer_controls' = 'M_c3. No adjustment for menstruation & avg. fertility',
  'control_week' = 'M_c4. Control week day and number',
  'control_time_of_response' = 'M_c5. Adjust for time of/for response',
  'control_autocorrelation' = 'M_c6. Model autocorrelation, lag 1',
  'autocorrelation_moving_avg' = 'M_c7. Model autocorrelation, moving avg., lag 1',
  'broad_window_ctrl' = 'M_c8. Broad BC, adj. menstruation', .ordered = T)
  coefs$model = factor(coefs$model, levels =  rev(levels(coefs$model)))
  coefs = coefs %>% mutate(term = recode(term, "fertile:includedhorm_contra" = "includedhorm_contra:fertile"))
  # eff_coefs$model = factor( car::Recode(eff_coefs$model, modeltranslate ))
  coefs_all = coefs
  coefs = coefs %>% filter(term == "fertile" | term == "includedhorm_contra:fertile")

  cat("\n\n\n#### M_e: Exclusion criteria \n\n")

  plot =  ggplot(coefs %>% filter(model %begins_with% "M_e" | model %begins_with% "M_1. "), aes(x = model, y = estimate, ymax = conf.high, ymin = conf.low, colour = term), group = 1) +
    geom_hline(yintercept = 0, linetype = "dotted", color = "gray70") +
    geom_text(aes(label = round(estimate,2), y = estimate),  position = position_dodge(width = 0.6), vjust = -0.7) +
    geom_pointrange( position = position_dodge(width = 0.6), size = 1) +
    scale_color_manual("Contraception status", values = c("includedhorm_contra:fertile"="black","fertile" = "red"), labels = c("includedhorm_contra:fertile"="hormonally\ncontracepting","fertile" = "fertile"), guide = F) +
    xlab("Model") +
    ylab(paste("Regression slope + 95 CI%")) +
    ggtitle(outcome_label) +
    coord_flip()
    print(plot)

    # cat("\n\n\n")
    # coefs_all %>%
    #   filter(model %begins_with% "M_e" | model %begins_with% "M_1. ") %>%
    #   select(model, term, estimate, conf.low, conf.high) %>%
    #   pander() %>%
    #   cat()

    cat("\n\n\n#### M_p: Predictors \n\n")
    plot =  ggplot( coefs %>% filter(model %begins_with% "M_p" | model %begins_with% "M_1. "), aes(x = model, y = estimate, ymax = conf.high, ymin = conf.low, colour = term), group = 1) +
      geom_hline(yintercept = 0, linetype = "dotted", color = "gray70") +
      geom_text(aes(label = round(estimate,2), y = estimate),  position = position_dodge(width = 0.6), vjust = -0.7) +
      geom_pointrange( position = position_dodge(width = 0.6), size = 1) +
      scale_color_manual("Contraception status", values = c("includedhorm_contra:fertile"="black","fertile"= "red"), labels = c("includedhorm_contra:fertile"="hormonally\ncontracepting","fertile"="fertile"), guide = F) +
      xlab("Model") +
      ylab(paste("Regression slope + 95 CI%")) +
      ggtitle(outcome_label) +
      coord_flip()
    print(plot)
#
    cat("\n\n\n#### M_c: Covariates, controls, autocorrelation \n\n")

    plot =  ggplot(coefs %>% filter(model %begins_with% "M_c" | model %begins_with% "M_1. "), aes(x = model, y = estimate, ymax = conf.high, ymin = conf.low, colour = term), group = 1) +
      geom_hline(yintercept = 0, linetype = "dotted", color = "gray70") +
      geom_text(aes(label = round(estimate,2), y = estimate),  position = position_dodge(width = 0.6), vjust = -0.7) +
      geom_pointrange( position = position_dodge(width = 0.6), size = 1) +
      scale_color_manual("Contraception status", values = c("includedhorm_contra:fertile"="black","fertile"= "red"), labels = c("includedhorm_contra:fertile"="hormonally\ncontracepting","fertile"="fertile"), guide = F) +
      xlab("Model") +
      ylab(paste("Regression slope + 95 CI%")) +
      ggtitle(outcome_label) +
      coord_flip()
    print(plot)

    tryCatch({
      if (class(obj) != "glmerMod") {
        print_summary(control_autocorrelation)
        print_summary(autocorrelation_moving_avg)
      } else {
        cat_message("No AR1/ARMA autocorrelation models were fitted for binomial outcomes.", "info")
      }
    }, error = function(e) { cat_message(e, "danger") })

    cat("\n\n\n#### M_d: Other designs \n\n")
    plot =  ggplot( coefs %>% filter(model %begins_with% "M_d" | model %begins_with% "M_1. "), aes(x = model, y = estimate, ymax = conf.high, ymin = conf.low, colour = term), group = 1) +
      geom_hline(yintercept = 0, linetype = "dotted", color = "gray70") +
      geom_text(aes(label = round(estimate,2), y = estimate),  position = position_dodge(width = 0.6), vjust = -0.7) +
      geom_pointrange( position = position_dodge(width = 0.6), size = 1) +
      scale_color_manual("Contraception status", values = c("includedhorm_contra:fertile"="black","fertile"= "red"), labels = c("includedhorm_contra:fertile"="hormonally\ncontracepting","fertile"="fertile"), guide = F) +
      xlab("Model") +
      ylab(paste("Estimate of effect on", outcome)) +
      coord_flip()
    print(plot)

    cat("\n\n\n#### _M_m1_: Moderation by contraceptive method \n\n
Based on the sample with lax exclusion criteria. Users who used any hormonal contraception are classified as hormonal, users who use any awareness-based methods (counting, temperature-based) are classified as 'fertility-awareness', women who don't fall into the before groups and use condoms, pessars, coitus interruptus etc. are classified as 'barrie or abstinence'. Women who don't use contraception or use other methods such as sterilisation are excluded from this analysis.
        \n\n")

    tryCatch({

      update(obj, formula = . ~ . - included * (menstruation + fertile) + contraceptive_methods + (fertile + menstruation) + included:(fertile + menstruation),
             data = diary, subset = !is.na(included_lax) & contraceptive_method != "other") ->  add_main

            update(obj, formula = . ~ . - included * (menstruation + fertile) + contraceptive_methods * (fertile + menstruation),
             data = diary, subset = !is.na(included_lax) & contraceptive_method != "other") -> by_method

      method_eff_coefs = sjp.int(by_method, type = "eff", showCI = F, printPlot = F)$data.list[[1]]
      rec = c("hormonal" = "hormonally\ncontracepting","barrier_or_abstinence" = "only barrier\n(condoms, ...)\nor abstinence", "fertility_awareness" = "potentially\nfertility aware", "none" = "not using contraception")
      method_eff_coefs$method = rec[as.character(method_eff_coefs$grp)]
      eff_plot =  ggplot(method_eff_coefs,
                         aes(x = x, y = y, ymax = conf.high, ymin = conf.low)) +
        geom_smooth( size = 1, stat = "identity", color = 'black') +
        xlab("Conception probability") +
        facet_wrap(~ method) +
        ylab(outcome)
      print(eff_plot)

      coefs = get_coefs(by_method, "by method")  %>% filter(term != "(Intercept)")
      plot = ggplot(coefs, aes(x = term, y = estimate, ymax = conf.high, ymin = conf.low, group = term)) +
        geom_hline(yintercept = 0, linetype = "dotted", color = "gray70") +
        geom_text(aes(label = round(estimate,2), y = estimate),  position = position_dodge(width = 0.6), vjust = -0.7) +
        geom_pointrange( position = position_dodge(width = 0.6), size = 1) +
        xlab("Model") +
        ylab(paste("Estimate of effect on", outcome)) +
        coord_flip()
      print(plot)
      print_summary(by_method)

      cat(pander(anova(add_main, by_method)))

    }, error = function(e) { cat_message(e, "danger") })

    invisible(obj)
}

Test a moderator

test_moderator = function (obj, moderator, diary, multiline = F, xlevels = 3) {
  tryCatch({

    add_main = update.formula(formula(obj), new = as.formula(paste0(". ~ . + ", moderator, " * included "))) # reorder so that the triptych looks nice
    add_mod_formula = update.formula(update.formula(formula(obj), new = . ~ . - included * fertile), new = as.formula(paste0(". ~ . + ", moderator, " * included * fertile"))) # reorder so that the triptych looks nice

    update(obj, formula = add_main) -> with_main
    update(obj, formula = add_mod_formula) -> with_mod
    cat(pander(anova(with_main, with_mod)))
    plot_triptych(with_mod, x.var = "fertile", multiline = multiline, xlevels = xlevels)
    print_summary(with_mod)
  }, error = function(e) { cat_message(e, "danger") })

  invisible(obj)
}

Diagnostics plots

print_diagnostics = function(obj) {
  print(plot(obj))
  cat("\n\n\n")
  qqnorm(resid(obj))
  cat("\n\n\n")
  sjp.lmer(obj, type = "fe.cor")
  cat("\n\n\n")
  sjp.lmer(obj, type = "re.qq")
  cat("\n\n\n")
  invisible(obj)
}

Plot outcome distribution

plot_outcome = function(obj, diary) {
  outcome = obj@frame[, 1]
  outcome_name = names(obj@frame)[1]
  label = attributes(diary[, outcome_name][[1]])$label
  if (is.null(label)) label = outcome_name
  qplot(outcome) + xlab(label)
}

slightly custom marginal_effects

marginal_effects_pass = function(obj, ...) {
  marg = plot(marginal_effects(obj, ...), do_plot = FALSE)
  marg$`fertile:included` = marg$`fertile:included` + scale_color_manual("Contraception status", values = c("horm_contra"="black","cycling"= "red"), labels = c("horm_contra"="hormonally\ncontracepting","cycling"="cycling"), guide = F) + scale_fill_manual("Contraception status", values = c("horm_contra"="black","cycling"= "red"), labels = c("horm_contra"="hormonally\ncontracepting","cycling"="cycling"), guide = F) + facet_wrap(~ included)
  for(i in seq_along(marg)) {
    print(marg[[i]])
  }
  invisible(obj)
}

reporting tools

helper function for magrittr pipes, pass and print

pap = function(pass, to_print = NULL, side_effect = NULL) {
  if( !is.null(to_print)) {
    if (is.function(to_print)) {
      print(to_print(pass))
    } else {
      print(to_print)
    }
  }
  side_effect
  invisible(pass)
}

drop_random_effects_brms = function(mod) {
  rand = stringr::str_detect(mod$fit@sim$fnames_oi, "^r_")

  mod$fit@sim$fnames_oi = mod$fit@sim$fnames_oi[ !rand ]
  for (i in 1:length(mod$fit@sim$samples)) {
    mod$fit@sim$samples[[i]] = mod$fit@sim$samples[[i]][ !rand ]
  }
  mod$fit@sim$dims_oi = mod$fit@sim$dims_oi[ !stringr::str_detect(names(mod$fit@sim$dims_oi), "^r_")]
  mod$fit@sim$pars_oi = mod$fit@sim$pars_oi[ !stringr::str_detect(mod$fit@sim$pars_oi, "^r_")]
  mod$fit@sim$n_flatnames = length(mod$fit@sim$samples[[i]])
  mod
}

Get p values from lme4 model

get_p_val = function(obj) {
  summa = summary(obj)
  fixefs = data.frame(summa$coefficients, check.names = F)
  fixefs$term = rownames(fixefs)
  rownames(fixefs) = NULL
  if("Pr(>|z|)" %in% names(fixefs)) {
    fixefs = fixefs %>% rename(p.value = `Pr(>|z|)`) %>% select(term, p.value)
  } else if("Pr(>|t|)" %in% names(fixefs)) {
    fixefs = fixefs %>% rename(p.value = `Pr(>|t|)`) %>% select(term, p.value)
  } else {
    fixefs$p.value = NA_real_
  }
  fixefs
}

2 digit number, no more no less

form = function(x, digits = 2) format(round(x, digits = digits), nsmall = digits, digits = digits)

from shamrt/rapa Report a p-value

Formats and reports a p-value according to APA 6 guidelines. Returns a string. Only accepts values between 0 and 1.

This function is primarily intended for use by other helper functions, so by default p-values are pretty-printed (e.g., “p = .152”) instead of simply returning the correctly rounded p-value (e.g., “.152”).

@param p A number representing a p-value (must be less than 1). @param pretty.print An optional dichotomous indicator for whether to prepend the p-value abbreviation and appropriate sign to the returned number (see details).

@examples apa.p.value(.15201) apa.p.value(.152e-2) apa.p.value(.00001)

@include errors.R @export

apa.p.value <- function(p, pretty.print = TRUE) {
  # error handling
  if (is.na(p)) {
    return(NA_real_)
  }
  else if (!is.numeric(p)) {
    stop(.type.error('p', 'numeric'))
  }
  if (p > 1) {
    stop(.value.error('p', "less than or equal to 1"))
  } else if (p < 0) {
    stop(.value.error('p', "greater than or equal to 0"))
  }

  # calculate displayed p-value
  if (p >= .001) {
    p.drop.trailing.0 <- form(p, 3)
    p.drop.leading.0 <- sub("0\\.(\\d+)", ".\\1",
                            as.character(p.drop.trailing.0))
    p.clean <- p.drop.leading.0

    p.pp <- paste("_p_", "=", p.clean)
  } else {
    p <- ".001"

    p.pp <- paste("_p_", "<", p)
  }

  return(ifelse(pretty.print, p.pp, p))
}
pvalues = function(x) {
  str_sub(sapply(x, FUN = apa.p.value), 5)
}

pander.tbl_df = pander.tbl_dt = function(x,...) { pander::pander(data.frame(x, check.names = F), ...) }


effect_size = function(model) {
  eff = diff(range(model@frame$fertile, na.rm = T))
  coefs = broom::tidy(model)
  fert = coefs[coefs$term == "fertile", ]
  sd = sqrt(sum(coefs[coefs$term %starts_with% "sd_Observation", "estimate"]^2 ))
  form(fert$estimate*3/2* eff / sd)
}

Spin R files

R scripts can be documented in markdown using Roxygen comments, as demonstrated here This function turns all R files (that don’t have an Rmd file of the same name and that don’t start with an underscore _) into HTML pages

spin_R_files_to_site_html = function() {
  library(knitr)
  all_Rs = c(list.files(pattern = "^[^_].+\\.R$"), ".Rprofile")
  component_Rmds = list.files(pattern = "^_.+\\.Rmd$")
  temporary_Rmds = c()
  for (i in seq_along(all_Rs)) {
    if (all_Rs[i] == ".Rprofile") {
      Rmd_file = ".Rprofile.Rmd"
    } else {
      Rmd_file = paste0(all_Rs[i], "md")
    }
    if (!file.exists(Rmd_file)) {
      next_document = length(temporary_Rmds) + 1
      temporary_Rmds[next_document] = spin(all_Rs[i], knit = FALSE, envir = new.env(), format = "Rmd")
      prepended_yaml = paste0(c("---
output:
  formr::markdown_custom_options:
    code_folding: 'show'
---

", readLines(temporary_Rmds[next_document])), collapse = "\n")
cat(prepended_yaml, file = temporary_Rmds[next_document])
    }
  }
  components_and_scripts = c(temporary_Rmds, component_Rmds)
  for (i in seq_along(components_and_scripts)) {
    opts_chunk$set(eval = FALSE, cache = FALSE)
    print(components_and_scripts[i])
    # if we call render_site on the .R file directly it adds a header I don't like
    rmarkdown::render_site(components_and_scripts[i], quiet = TRUE)
  }
  opts_chunk$set(eval = TRUE, cache = TRUE)
  unlink(temporary_Rmds)
}