Mis-allocated scrutiny in science: a quick simulation

Below I document my simulation code underlying an upcoming blog post at The 100% CI.


# load packages
library(tidyverse)
library(knitr)
library(kableExtra)

Parameters


n_papers <- 10000
n_journals <- 15
n_revisions <- 3
submission_limit <- 10

Spawn papers and journals


set.seed(1610)
journals <- tibble(
  journal = 1:n_journals,
  threshold = rnorm(n_journals, sd = 1.8),
  fame = threshold + 0.3 * rnorm(n_journals),
  submitted = 0,
  accepted = 0,
  reviewer_count = case_when(
    fame > 2 ~ 5,
    fame > 1 ~ 4,
    fame > 0 ~ 3,
    fame > -1 ~ 2,
    TRUE ~ 1
  )
) %>% arrange(desc(fame)) %>%
  mutate(journal = 1:n_journals) %>% 
  as.data.frame()

papers <- tibble(
  paper = 1:n_papers,
  fitness = rnorm(n_papers, sd = 1.5),
  reviews = 0,
  revisions = 0,
  submissions = 0,
  submission_sequence = "",
  published = FALSE,
  journal = NA_real_,
  first_choice = NA_real_,
  authors_own_assessment = NA_real_,
  editors_assessment = NA_real_,
  most_recent_assessment = NA_real_
) %>% 
  as.data.frame()

kable(table(journals$reviewer_count), 
      caption = "How many journals recruit how many reviewers on average?")
Table 1: How many journals recruit how many reviewers on average?
Var1 Freq
1 2
2 4
3 3
4 3
5 3

decisions <- tibble(
  did_not_submit = 0,
  desk_rejection = 0,
  reject_after_reviews = 0,
  revise_and_resubmit = 0,
  accepted = 0
) %>% 
  as.data.frame()


get_reviews <- function(fitness, n = 1) {
  error <- 1/(sqrt(1+n))
  sqrt((1 - error)) * fitness + (sqrt(error)) * rnorm(length(fitness))
}

Main simulation loop


# library(profvis)
# profvis({
for(p in 1:n_papers) {
  papers[p, "authors_own_assessment"] <- 1 + get_reviews(papers[p, "fitness"], 1)
  # submission loop
  for(j in 1:n_journals) {
    if(papers[p, "authors_own_assessment"] <
       (journals[j, "fame"] - 1.8)) {
      decisions$did_not_submit = decisions$did_not_submit + 1
      # WOULD NOT EVEN SUBMIT
    } else {
      # SUBMIT
      papers[p, "submissions"] <- papers[p, "submissions"] + 1
      papers[p, "submission_sequence"] <- paste0(
        papers[p, "submission_sequence"], j, ", ")
      journals[j, "submitted"] = journals[j, "submitted"] + 1
      if(is.na(papers[p, "first_choice"])) {
        papers[p, "first_choice"] <- j
      }

      # EDITOR REVIEWS
      papers[p, "editors_assessment"] <- get_reviews(papers[p, "fitness"], 1)
      papers[p, "reviews"] <- papers[p, "reviews"] + 1

      if(papers[p, "editors_assessment"] <
         (journals[j, "threshold"] - 1.5)) {
        # DECISION: DESK REJECTION
        decisions$desk_rejection = decisions$desk_rejection + 1
        papers[p, "fitness"] <- papers[p, "fitness"] + 
          0.05/papers[p, "submissions"]
        papers[p, "revisions"] <- papers[p, "revisions"] + 1
      } else {
        # SENT FOR REVIEW
        # revision loop
        for(r in 1:n_revisions) {
          papers[p, "most_recent_assessment"] <-
            get_reviews(papers[p, "fitness"], journals[j, "reviewer_count"])

          if(papers[p, "most_recent_assessment"] <
                    (journals[j, "threshold"] - 0.5)) {
            # DECISION: REJECT AFTER REVIEWS
            decisions$reject_after_reviews = decisions$reject_after_reviews + 1
            papers[p, "reviews"] <- papers[p, "reviews"] + journals[j, "reviewer_count"]
            papers[p, "revisions"] <- papers[p, "revisions"] + 1
            # diminishing returns
            papers[p, "fitness"] <- papers[p, "fitness"] + 
              0.1/papers[p, "submissions"]
            break
          } else if(papers[p, "most_recent_assessment"] <
                    (journals[j, "threshold"])) {
            # DECISION: MAJOR REVISION/R&R
            decisions$revise_and_resubmit = decisions$revise_and_resubmit + 1
            papers[p, "reviews"] <- papers[p, "reviews"] + journals[j, "reviewer_count"]
            papers[p, "revisions"] <- papers[p, "revisions"] + 1
            papers[p, "fitness"] <- papers[p, "fitness"] + 
              0.3/papers[p, "submissions"]
          } else if(papers[p, "most_recent_assessment"] >=
             journals[j, "threshold"]) {
            # DECISION: ACCEPTED/MINOR REVISION
            decisions$accepted = decisions$accepted + 1
            papers[p, "reviews"] <- papers[p, "reviews"] + journals[j, "reviewer_count"]
            papers[p, "published"] <- TRUE
            journals[j, "accepted"] = journals[j, "accepted"] + 1
            papers[p, "journal"] <- journals[j, "journal"]
            break # acceptance
          }
        } # end revision loop
      }
    }
    if (papers[p, "published"]) {
      break # done
    } else if (papers[p, "submissions"] >= submission_limit) {
      break # give up
    }
  } # end journal loop
} # end paper loop
# })

Inspect results


theme_set(theme_minimal())
options(digits = 2)
kable(decisions, 
      caption = "How common are certain editorial decisions?")
Table 2: How common are certain editorial decisions?
did_not_submit desk_rejection reject_after_reviews revise_and_resubmit accepted
17244 31843 26577 7195 8729

kable(table(papers$submissions), 
      caption = "How often do papers get submitted?")
Table 2: How often do papers get submitted?
Var1 Freq
1 230
2 288
3 695
4 715
5 1642
6 887
7 1408
8 887
9 1261
10 1987

kable(table(papers$published, exclude=NULL), 
      caption = "How many do not end up published, i.e. authors give up after 8 tries?")
Table 2: How many do not end up published, i.e. authors give up after 8 tries?
Var1 Freq
FALSE 1271
TRUE 8729

papers_in_journals <- papers %>% left_join(journals)
papers_in_journals %>% select(fitness, fame, submissions, reviews) %>% 
  cor(use = 'pairwise') %>% round(2) %>% 
  kable(caption = "How do paper fitness, journal fame, paper's number of submissions and paper's accumulated number of reviews intercorrelate?")
Table 2: How do paper fitness, journal fame, paper’s number of submissions and paper’s accumulated number of reviews intercorrelate?
fitness fame submissions reviews
fitness 1.00 0.89 -0.58 0.02
fame 0.89 1.00 -0.59 -0.17
submissions -0.58 -0.59 1.00 0.55
reviews 0.02 -0.17 0.55 1.00

ggplot(papers_in_journals, aes(journal, reviews)) +
  geom_jitter(alpha = 0.2) +
  geom_pointrange(stat = 'summary', color = "blue") +
  ggtitle("Mis-allocated scrutiny", subtitle = "Simulated data") +
  xlab("Journal rank") +
  ylab("Accumulated reviews")


journals %>% mutate(acceptance_rate = round(accepted/submitted,2)) %>% 
  kable(caption = "How many papers are submitted to each journal and how many are accepted?",
        digits = 1)
Table 2: How many papers are submitted to each journal and how many are accepted?
journal threshold fame submitted accepted reviewer_count acceptance_rate
1 2.9 2.8 4891 132 5 0.0
2 2.8 2.7 5358 112 5 0.0
3 2.2 2.1 7112 407 5 0.1
4 2.0 1.6 7856 329 4 0.0
5 1.2 1.5 7651 1459 4 0.2
6 1.4 1.2 6723 377 4 0.1
7 0.8 0.9 6704 1117 3 0.2
8 0.8 0.7 5681 600 3 0.1
9 0.1 0.4 5260 1620 3 0.3
10 0.4 -0.1 3772 512 2 0.1
11 0.3 -0.2 2653 309 2 0.1
12 -0.5 -0.3 2205 835 2 0.4
13 -0.9 -0.6 1067 426 2 0.4
14 -2.6 -2.1 498 474 1 0.9
15 -2.2 -2.2 23 20 1 0.9

Corrections

If you see mistakes or want to suggest changes, please create an issue on the source repository.

Reuse

Text and figures are licensed under Creative Commons Attribution CC BY 4.0. Source code is available at https://github.com/rubenarslan/rubenarslan.github.io, unless otherwise noted. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".