this script runs a model on our scientific computing cluster
library(dplyr); library(brms)
setwd("/usr/users/rarslan/updated_data/")
args = commandArgs()
dataset = args[6]
uptobyear = args[7]
load(paste0(dataset, ".rdata"))
model_data = get(paste0(dataset, ".1")) %>% arrange(idParents, byear)
model_data$first_born_adult_male = ave(ifelse(model_data$male == 1 & model_data$surviveR == 1,1,0), model_data$idParents,
FUN = function(x) {
x[is.na(x)] = 0
y = cumsum(x)
ifelse(y > 0 & y == x,1,0)
})
model_data$last_born_adult_male = ave(ifelse(model_data$male == 1 & model_data$surviveR == 1,1,0), model_data$idParents,
FUN = function(x) {
x[is.na(x)] = 0
y = max(cumsum(x)) - cumsum(x) + 1
ifelse(y > 0 & y == x,1,0)
})
model_data = model_data %>% tbl_df %>%
filter(byear < uptobyear) %>%
select(children, birth.cohort, male, maternalage.factor, paternalage.mean, paternalage, paternal_loss, maternal_loss, older_siblings, nr.siblings, last_born, idParents, first_born_adult_male, last_born_adult_male) %>%
na.omit()
model_formula = children ~ paternalage + birth.cohort + first_born_adult_male + last_born_adult_male + male + maternalage.factor + paternalage.mean + paternal_loss + maternal_loss + older_siblings + nr.siblings + last_born + (1 | idParents)
if (dataset == "swed_subset_children") {
model_prior = c(set_prior("normal(0,5)", class = "b"),
set_prior("student_t(3, 0, 5)", class = "sd"))
model_family = poisson()
} else {
model_prior = c(set_prior("normal(0,5)", class = "b"),
set_prior("normal(0,5)", class = "b", nlpar = "hu"),
set_prior("student_t(3, 0, 5)", class = "sd"))
model_formula_hu = update(model_formula, hu ~ . )
model_formula = bf(model_formula, model_formula_hu)
model_family = hurdle_poisson()
}
model = brm( model_formula,
prior = model_prior,
family = model_family, data = model_data,
chains = 6, iter = 1500, warmup = 300, cores = 6, ranef = FALSE)
summary(model)
saveRDS(model,file = paste0("coefs/", dataset, "/r8_adjust_for_first_born_adult.rds"))