Researcher degree of freedom simulation

bsub -a openmpi -q mpi -W 48:00 -n 150,200 mpirun.lsf R --slave -f /usr/users/rarslan/relationship_dynamics/1_researcher_df.R thanks to bbolker http://rpubs.com/bbolker/lme4sims & http://cran.r-project.org/web/packages/foreach/vignettes/nested.pdf

options(stringsAsFactors = FALSE)
library = function (...) suppressMessages(base::library(...))
library(dplyr)

set up MPI

setwd("/usr/users/rarslan/relationship_dynamics/")
library(doMPI)
cl <- startMPIcluster(verbose = T)
registerDoMPI(cl)
source("0_simulation_functions.R")

Parameters

sample_people = c(25, 50, 70, 100)
nr_of_days = c(2, 30, 90)
dayspans = list("1:38" = 1:38, "18,5" = c(5, 18))
fertility_effects = c(0)
trait_effects = c(0, 0.5, 1)
miss_windows = c(0, 2)
researcher_degrees = list(rdf_none, rdf_optional_stopping, rdf_covariate, rdf_outcome, rdf_predictor, rdf_outcome_covariate, rdf_predictor_outcome_covariate, rdf_stopping_predictor_outcome_covariate, rdf_stopping_predictor)
names(researcher_degrees) = list('rdf_none ','rdf_optional_stopping', 'rdf_covariate', 'rdf_outcome', 'rdf_predictor', 'rdf_outcome_covariate', 'rdf_predictor_outcome_covariate', 'rdf_stopping_predictor_outcome_covariate', 'rdf_stopping_predictor')
nrep <- 1:1000
res0 <- rdf_stopping_predictor_outcome_covariate(simulate_fertility_effect())

Time prediction, number of nodes

# time = system.time({ for (i in 1:10) { rdf_stopping_predictor_outcome_covariate(simulate_fertility_effect()) }})
# length(sample_people) *
#   length(nr_of_days) *
#   length(dayspans) *
#   length(fertility_effects) *# nodes
#   length(trait_effects) *
#   length(miss_windows)
#
#   length(researcher_degrees) * 10 * time[3]  / 60 / 60 # hours per node

Simulation

resdf = foreach(i = seq_along(sample_people), .combine = "bind_rows") %:%
  foreach(j = seq_along(nr_of_days), .combine = "bind_rows") %:%
  foreach(k = seq_along(dayspans), .combine = "bind_rows") %:%
  foreach(l = seq_along(fertility_effects), .combine = "bind_rows") %:%
    foreach(m = seq_along(trait_effects), .combine = "bind_rows") %:%
      foreach(n = seq_along(miss_windows), .combine = "bind_rows", .errorhandling = "remove") %dopar% {
        source("0_simulation_functions.R")

        #' ## Parameters
        sample_people = c(25, 50, 70, 100)
        nr_of_days = c(2, 30, 90)
        dayspans = list("1:38" = 1:38, "18,5" = c(5, 18))
        fertility_effects = c(0)
        trait_effects = c(0, 0.5, 1)
        miss_windows = c(0, 2)
        researcher_degrees = list(rdf_none, rdf_optional_stopping, rdf_covariate, rdf_outcome, rdf_predictor, rdf_outcome_covariate, rdf_predictor_outcome_covariate, rdf_stopping_predictor_outcome_covariate, rdf_stopping_predictor)
        names(researcher_degrees) = list('rdf_none ','rdf_optional_stopping', 'rdf_covariate', 'rdf_outcome', 'rdf_predictor', 'rdf_outcome_covariate', 'rdf_predictor_outcome_covariate', 'rdf_stopping_predictor_outcome_covariate', 'rdf_stopping_predictor')
        nrep <- 1:1000

        foreach(repetition = nrep, .combine = "bind_rows", .maxcombine = 1000, .errorhandling = "remove") %do% {
          cat(i, j, k, l, m, n, "\n")

          simulated_data = simulate_fertility_effect(
            nr_of_people = sample_people[i],
            nr_days = nr_of_days[j],
            dayspan = dayspans[[k]],
            miss_window = miss_windows[n],
            effects = list(
              trait = trait_effects[m],
              fertility_effect = fertility_effects[l],
              noise = 0.5
            )
          )

          foreach(rdfi = seq_along(researcher_degrees), .combine = "bind_rows", .maxcombine = 1000, .errorhandling = "remove") %do% {
            rdf = researcher_degrees[[rdfi]]

            ## if you want to see progress in the output file
            result = data.frame(
              nr_of_people = sample_people[i],
              nr_days = nr_of_days[j],
              dayspan = names(dayspans)[k],
              miss_window = miss_windows[n],
              trait_effect = trait_effects[m],
              fertility_effect = fertility_effects[l],
              rdf = names(researcher_degrees)[rdfi],
              estimate = NA_real_,
              std.error = NA_real_,
              statistic = NA_real_,
              p.value = NA_real_,

              usable_days = NA_real_,
              covariate_used = NA_real_,
              outcome = NA_real_,
              predictor = NA_character_,
              stopping.n = NA_real_
            )
            try({
              fit_results = rdf(simulated_data)
              result[, names(fit_results)] = fit_results
            }, silent = T)
            result
    }
  }
}
saveRDS(resdf,file = "rdf_sims.rds")


closeCluster(cl)
mpi.quit()