APA PsycTests
# A tibble: 1 × 2
`n_distinct(Name)` `n()`
<int> <int>
1 1237 3043
records_wide$first_construct <- str_trim(str_replace_all(str_to_lower(records_wide$first_construct), "[:space:]+", " "))
Rows: 309223 Columns: 5
── Column specification ──────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): Name, Journal
dbl (3): Hit Count, Year, number of search results
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
`summarise()` has grouped output by 'Name'. You can override using the `.groups` argument.
overview <- readr::read_tsv("../sober_rubric/raw_data/20230617_ebsco_scrape_clean_overview_table_1.tsv")
Rows: 71692 Columns: 4
── Column specification ──────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): DOI
dbl (3): first_pub_year, last_pub_year, Hits
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
byyear <- readr::read_tsv("../sober_rubric/raw_data/20230617_ebsco_scrape_table_years_1.tsv")
Rows: 218142 Columns: 3
── Column specification ──────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): DOI
dbl (2): Year, Hits
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
n_distinct(byyear$DOI)
[1] 31145
[1] 40574
nrow(overview)
[1] 71692
[1] 31118
[1] 13280
anti_join: added no columns
> rows only in x 204,862
> rows only in y ( 0)
> matched rows ( 13,280)
> =========
> rows total 204,862
left_join: added 3 columns (first_pub_year, last_pub_year, total_hits)
> rows only in x 1
> rows only in y ( 40,548)
> matched rows 218,141
> =========
> rows total 218,142
n_distinct(all$DOI)
[1] 31145
[1] 0
`summarise()` has grouped output by 'total_hits'. You can override using the
`.groups` argument.
[1] 0
# all %>% group_by(total_hits, DOI) %>% summarise(hits_by_year = sum(usage_count, na.rm = T)) %>% filter(hits_by_year < total_hits) %>% select(DOI, everything()) %>% mutate(diff = hits_by_year - total_hits) %>% View()
all %>% group_by(total_hits, DOI) %>% summarise(hits_by_year = sum(Hits, na.rm = T)) %>% filter(hits_by_year == total_hits) %>% nrow()
`summarise()` has grouped output by 'total_hits'. You can override using the
`.groups` argument.
[1] 31118
# A tibble: 27 × 2
DOI hits_by_year
<chr> <dbl>
1 10.1037/t00747-000 0
2 10.1037/t00875-000 0
3 10.1037/t00878-000 0
4 10.1037/t00879-000 0
5 10.1037/t02477-000 0
6 10.1037/t02488-000 0
7 10.1037/t04670-000 0
8 10.1037/t04771-000 0
9 10.1037/t04776-000 0
10 10.1037/t04779-000 0
# ℹ 17 more rows
`summarise()` has grouped output by 'total_hits'. You can override using the
`.groups` argument.
.
0
27
psyctests_info <- records_wide %>%
select(DOI, TestYear, Name, first_construct,
original_test_DOI, original_DOI_combined,
test_type, ConstructList, subdiscipline_1, subdiscipline_2,
classification_1, classification_2, instrument_type_broad,
InstrumentType,
number_of_factors_subscales, Name_base) %>%
distinct() %>%
inner_join(all, by = c("DOI" = "DOI"), multiple = "all") %>%
rename(usage_count = "Hits")
inner_join: added 5 columns (Year, Hits, first_pub_year, last_pub_year, total_hits)
> rows only in x ( 40,548)
> rows only in y ( 1)
> matched rows 218,141 (includes duplicates)
> =========
> rows total 218,141
write_rds(psyctests_info, "../sober_rubric/raw_data/psyctests_info.rds")
test_data <- records_wide %>%
filter(TestYear <= 2022) %>%
rowwise() %>%
mutate(Methodology = length(MethodologyList) >0) %>%
mutate(AdministrationMethod = length(AdministrationMethodList) >0) %>%
mutate(PopulationGroup = length(PopulationGroupList) >0) %>%
mutate(AgeGroup = length(AgeGroupList) >0) %>%
group_by(TestYear) %>%
summarise(Reliability = mean(Reliability!="No reliability indicated."),
FactorAnalysis = mean(FactorAnalysis!="No factor analysis indicated."),
# Unidimensional = mean(FactorAnalysis=="This is a unidimensional measure."),
FactorsAndSubscales = mean(!is.na(FactorsAndSubscales)),
Validity = mean(Validity!="No validity indicated."),
Format = mean(!is.na(Format)),
# Fee = mean(Fee == "Yes"),
Methodology = mean(Methodology),
AdministrationMethod = mean(AdministrationMethod),
# AgeGroup = mean(AgeGroup),
# PopulationGroup = mean(PopulationGroup),
TestItems = mean(TestItemsAvailable == "Yes")) %>%
pivot_longer(-TestYear)
pivot_longer: reorganized (Reliability, FactorAnalysis, FactorsAndSubscales, Validity, Format, …) into (name, value) [was 117x9, now 936x3]
test_data %>%
ggplot(aes(TestYear, value, color = name)) +
geom_vline(xintercept = 2016, linetype = 'dashed') +
geom_line() +
scale_x_continuous("Publication year in APA PsycTests",
limits = c(1993, 2030),
breaks = seq(1993, 2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022"),
expand = expansion(add = c(0, 1.2))) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
geom_text_repel(
data = test_data %>% drop_na() %>% group_by(name) %>% filter(TestYear == max(TestYear, na.rm = T)),
aes(label = name),
segment.color = 'grey',
xlim = c(2022, 2033),
box.padding = 0.1,
# point.padding = 0.6,
nudge_x = 1.2,
# nudge_y = 0,
force = 0.5,
hjust = 0,
direction="y",
na.rm = TRUE
) +
ylab("PsycTests contains information about...") +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
drop_na: no rows removed
Warning: Removed 696 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/changed_standards_2016.pdf", width = 8, height = 4)
Warning: Removed 696 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/changed_standards_2016.png", width = 8, height = 4)
Warning: Removed 696 rows containing missing values or values outside the scale range
(`geom_line()`).
records_wide %>%
group_by(TestYear) %>%
summarise(number_of_test_items = mean(number_of_test_items, na.rm = T)) %>%
filter(TestYear >= 1993, TestYear <= 2022) %>%
ggplot(aes(TestYear, number_of_test_items)) +
geom_vline(xintercept = 2016, linetype = 'dashed') +
geom_line() +
scale_x_continuous("Publication year in APA PsycTests",
limits = c(1993, NA),
breaks = seq(1993, 2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022")) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none") +
xlab("Publication year in APA PsycTests") +
ylab("Number of items in measure")
records_wide %>%
filter(TestYear >= 1993, TestYear <= 2022) %>%
ggplot(aes(TestYear, number_of_factors_subscales)) +
geom_vline(xintercept = 2016, linetype = 'dashed') +
geom_pointrange(stat = 'summary', fun.data = 'mean_se') +
xlim(1993, 2022) +
xlab("Publication year in APA PsycTests") +
ylab("Number of factors/subscales")
Warning: Removed 54780 rows containing non-finite outside the scale range
(`stat_summary()`).
records_wide %>%
group_by(InstrumentType, TestYear) %>%
summarise(tests = n()) %>%
group_by(TestYear) %>%
mutate(tests = tests/sum(tests, na.rm = T)) %>%
arrange(TestYear) %>%
filter(TestYear >= 1993, TestYear <= 2022) %>%
# mutate(tests = cumsum(tests)) %>%
ggplot(aes(TestYear, tests, color = InstrumentType)) +
geom_vline(xintercept = 2016, linetype = 'dashed') +
scale_color_viridis_d() +
geom_line() +
xlim(1993, 2022) +
xlab("Publication year in APA PsycTests") +
ylab("Proportion of instrument type")
`summarise()` has grouped output by 'InstrumentType'. You can override using the
`.groups` argument.
Warning: Removed 29 rows containing missing values or values outside the scale range
(`geom_line()`).
records_wide %>%
# filter(instrument_type_broad !=)
group_by(instrument_type_broad, TestYear) %>%
summarise(tests = n()) %>%
group_by(TestYear) %>%
mutate(tests = tests/sum(tests, na.rm = T)) %>%
arrange(TestYear) %>%
filter(TestYear >= 1993, TestYear <= 2022) %>%
# mutate(tests = cumsum(tests)) %>%
ggplot(aes(TestYear, tests, color = instrument_type_broad)) +
geom_vline(xintercept = 2016, linetype = 'dashed') +
geom_line() +
xlim(1993, 2022) +
xlab("Publication year in APA PsycTests") +
ylab("Proportion of instrument type")
`summarise()` has grouped output by 'instrument_type_broad'. You can override
using the `.groups` argument.
records_wide %>%
filter(instrument_type_broad != "questionnaire") %>%
group_by(InstrumentType, TestYear) %>%
summarise(tests = n()) %>%
group_by(TestYear) %>%
mutate(tests = tests/sum(tests, na.rm = T)) %>%
arrange(TestYear) %>%
filter(TestYear >= 1993, TestYear <= 2022) %>%
# mutate(tests = cumsum(tests)) %>%
ggplot(aes(TestYear, tests, color = InstrumentType)) +
geom_vline(xintercept = 2016, linetype = 'dashed') +
geom_line() +
xlim(1993, 2022) +
xlab("Publication year in APA PsycTests") +
ylab("Proportion of instrument type") +
ggtitle("Without questionnaires")
`summarise()` has grouped output by 'InstrumentType'. You can override using the
`.groups` argument.
psyctests_info %>% group_by(DOI, TestYear) %>%
summarise(used = sum(usage_count, na.rm = T)) %>%
full_join(records_wide %>% select(DOI, TestYear)) %>%
mutate(used = coalesce(used, 0)) %>%
filter(TestYear >= 1993, TestYear <= 2022) %>%
group_by(TestYear) %>%
summarise(never_reused = mean(used == 0),
used_once = mean(used == 1),
used_twice = mean(used == 2),
used_thrice = mean(used == 3),
used_more_10 = mean(used > 9)) %>%
pivot_longer(-TestYear) %>%
ggplot(aes(TestYear, value, color = name)) +
geom_vline(xintercept = 2016, linetype = 'dashed') +
geom_line() +
xlim(1993, 2022) +
xlab("Publication year in APA PsycTests") +
ylab("Frequency")
`summarise()` has grouped output by 'DOI'. You can override using the `.groups`
argument.
Joining with `by = join_by(DOI, TestYear)`
full_join: added no columns
> rows only in x 0
> rows only in y 40,548
> matched rows 31,144
> ========
> rows total 71,692
pivot_longer: reorganized (never_reused, used_once, used_twice, used_thrice,
used_more_10) into (name, value) [was 30x6, now 150x3]
count_all <- records_wide %>%
group_by(TestYear) %>%
summarise(tests = n()) %>%
arrange(TestYear)
count_orig <- records_wide %>%
filter(test_type == "Original") %>%
group_by(TestYear) %>%
summarise(tests = n()) %>%
arrange(TestYear)
count_base <- records_wide %>%
arrange(TestYear) %>%
distinct(Name_base, .keep_all = T) %>%
group_by(TestYear) %>%
summarise(tests = n_distinct(DOI))
count_construct <- records_wide %>%
arrange(TestYear) %>%
distinct(first_construct, .keep_all = T) %>%
group_by(TestYear) %>%
summarise(tests = n_distinct(first_construct)) %>%
arrange(TestYear)
counts <- bind_rows(
"novel constructs" = count_construct,
"novel measures" = count_orig,
"with translations\n and revisions" = count_all,
.id = "origin"
) %>%
rename(Year = TestYear) %>%
filter(Year <= 2022)
ggplot(counts, aes(Year, tests, color = origin)) +
geom_line() +
geom_vline(xintercept = 2016, linetype = 'dashed') +
scale_y_continuous("Number of measures/constructs published") +
scale_x_continuous("Publication year in APA PsycTests",
limits = c(1993, 2030),
breaks = seq(1993, 2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022")) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = counts %>% drop_na() %>% group_by(origin) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ", origin)),
segment.curvature = -0.1,
segment.square = TRUE,
lineheight = .9,
# segment.color = 'grey',
box.padding = 0.1,
point.padding = 0.6,
nudge_x = 1.5,
nudge_y = -15,
force = 1,
hjust = 0,
direction="y",
na.rm = F) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
drop_na: no rows removed
Warning: Removed 250 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/counts.pdf", width = 8, height = 4)
Warning: Removed 250 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/counts.png", width = 8, height = 4)
Warning: Removed 250 rows containing missing values or values outside the scale range
(`geom_line()`).
cumsum_all <- records_wide %>%
group_by(TestYear) %>%
summarise(tests = n()) %>%
arrange(TestYear) %>%
mutate(tests = cumsum(tests))
cumsum_orig <- records_wide %>%
filter(test_type == "Original") %>%
group_by(TestYear) %>%
summarise(tests = n()) %>%
arrange(TestYear) %>%
mutate(tests = cumsum(tests))
cumsum_base <- records_wide %>%
arrange(TestYear) %>%
distinct(Name_base, .keep_all = T) %>%
group_by(TestYear) %>%
summarise(tests = n_distinct(DOI)) %>%
mutate(tests = cumsum(tests))
cumsum_construct <- records_wide %>%
arrange(TestYear) %>%
distinct(first_construct, .keep_all = T) %>%
group_by(TestYear) %>%
summarise(tests = n_distinct(DOI)) %>%
arrange(TestYear) %>%
mutate(tests = cumsum(tests))
cumsums <- bind_rows(
"novel constructs" = cumsum_construct,
"novel measures" = cumsum_orig,
"with translations\n and revisions" = cumsum_all,
.id = "origin"
) %>%
rename(Year = TestYear) %>%
filter(Year <= 2022)
ggplot(cumsums, aes(Year, tests, color = origin)) +
geom_line() +
geom_vline(xintercept = 2016, linetype = 'dashed') +
scale_y_continuous("Cumulative number of measures") +
scale_x_continuous("Publication year in APA PsycTests",
limits = c(1993, 2030),
breaks = seq(1993,2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022"),
expand = expansion(add = c(0, 1))) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = cumsums %>% drop_na() %>% group_by(origin) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ", origin, "\n (n = ", tests, ")")),
segment.square = TRUE,
lineheight = .9,
segment.color = 'grey',
nudge_x = 1.2,
hjust = 0,
na.rm = TRUE) +
# theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none") +
guides(
x = guide_axis(cap = "both"), # Cap both ends
)
drop_na: no rows removed
Warning: Removed 250 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsums.pdf", width = 8, height = 4)
Warning: Removed 250 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsums.png", width = 8, height = 4)
Warning: Removed 250 rows containing missing values or values outside the scale range
(`geom_line()`).
`summarise()` has grouped output by 'subdiscipline_1'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'subdiscipline_1'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'subdiscipline_1'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'subdiscipline_1'. You can override using the
`.groups` argument.
cumsums <- bind_rows(
"constructs" = cumsum_construct,
"measures" = cumsum_orig,
"with translations & revisions" = cumsum_all,
.id = "origin"
) %>%
rename(Year = TestYear) %>%
filter(Year <= 2022)
cumsums$origin <- factor(cumsums$origin, levels = c("with translations & revisions", "constructs", "measures"))
my_colors <- c("with translations & revisions" = "#7570B3",
"constructs" = "#1B9E77",
"measures" = "#D95F02")
ggplot(cumsums, aes(Year, tests, color = origin)) +
geom_line() +
facet_wrap(~ subdiscipline_1, scales = "free_y", ncol = 2) +
scale_y_continuous("Cumulative number of measures") +
scale_x_continuous("Publication year in APA PsycTests",
limits = c(1993, 2022),
breaks = seq(1993, 2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022")) +
scale_color_manual(values = my_colors, guide = guide_legend(title = NULL)) +
theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = c(0.58, 0.08),
legend.justification = c(0, 0),
legend.box.just = "right",
legend.text = element_text(size = 11))
Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
Warning: Removed 189 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsums_subdiscipline.pdf", width = 8, height = 7)
Warning: Removed 189 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsums_subdiscipline.png", width = 8, height = 7)
Warning: Removed 189 rows containing missing values or values outside the scale range
(`geom_line()`).
drop_na: removed 51,440 rows (24%), 166,701 rows remaining
Joining with `by = join_by(Test)`
full_join: added one column (n)
> rows only in x 25,130
> rows only in y 3,687
> matched rows 18,179
> ========
> rows total 46,996
test_frequency <- test_frequency %>%
group_by(n) %>%
summarise(count = n()) %>%
ungroup() %>%
mutate(percent = count/sum(count))
freq_plot <- ggplot(test_frequency, aes(n, count)) +
geom_bar(width = 0.1, fill = colors["novel"], stat = "identity") +
# facet_wrap(~ subdiscipline_1, scales = "free_y") +
# scale_y_sqrt("Number of measures", breaks = c(0, 100, 400, 1000, 2000, 4000, 6000, 10000), limits = c(0, 11500)) +
scale_y_continuous("Number of measures") +
scale_x_log10("Usages recorded in APA PsycInfo 1993-2022",
breaks = c(0.5, 1, 2, 5, 10, 100, 1000, 25000),
labels = c(0, 1, 2, 5, 10, 100, 1000, 25000)) +
geom_text(aes(label = if_else(n <= 2, sprintf("%.0f%%", percent*100), ""),
x = n, y = count + 700)) +
# scale_x_sqrt(breaks = c(0, 1, 2, 3, 4, 5, 10, 20, 40, 50), labels = c(0, 1, 2, 3, 4, 5, 10, 20, 40, "50+")) +
# geom_text_repel(aes(x = n, label = first_acronym, y = y),
# data =
# test_frequency %>% group_by(subdiscipline_1) %>% filter(row_number() > (n() - 10) ) %>% left_join(records_wide %>% select(Test = DOI, first_acronym)) %>%
# mutate(first_acronym = if_else(first_acronym == "HRSD", "HAM-D",
# first_acronym)) %>%
# mutate(y = 20 + 50*(1+n()-row_number())),
# size = 3.3, force = 5, force_pull = 0, max.time = 1,
# max.overlaps = Inf,
# segment.color = "lightgray",
# segment.curvature = 1,
# hjust = 1,
# nudge_y = 10,
# direction = "y"
# ) +
theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
freq_plot
Warning: `position_stack()` requires non-overlapping x intervals.
ggsave("figures/frequency_across.pdf", width = 8, height = 4)
Warning: `position_stack()` requires non-overlapping x intervals.
ggsave("figures/frequency_across.png", width = 8, height = 4)
Warning: `position_stack()` requires non-overlapping x intervals.
test_frequency <- test_frequency %>%
filter(n >= 1) %>%
mutate(percent = count/sum(count))
freq_plot <- ggplot(test_frequency, aes(n, count)) +
geom_bar(width = 0.1, fill = colors["novel"], stat = "identity") +
# scale_y_sqrt("Number of measures", breaks = c(0, 100, 400, 1000, 2000, 4000, 6000, 10000), limits = c(0, 11500)) +
scale_y_continuous("Number of measures") +
scale_x_log10("Usages recorded in APA PsycInfo 1993-2022",
breaks = c(1, 2, 5, 10, 100, 1000, 25000),
labels = c(1, 2, 5, 10, 100, 1000, 25000)) +
geom_text(aes(label = if_else(n <= 2, sprintf("%.0f%%", percent*100), ""),
x = n, y = count + 700)) +
# scale_x_sqrt(breaks = c(0, 1, 2, 3, 4, 5, 10, 20, 40, 50), labels = c(0, 1, 2, 3, 4, 5, 10, 20, 40, "50+")) +
# geom_text_repel(aes(x = n, label = first_acronym, y = y),
# data =
# test_frequency %>% group_by(subdiscipline_1) %>% filter(row_number() > (n() - 10) ) %>% left_join(records_wide %>% select(Test = DOI, first_acronym)) %>%
# mutate(first_acronym = if_else(first_acronym == "HRSD", "HAM-D",
# first_acronym)) %>%
# mutate(y = 20 + 50*(1+n()-row_number())),
# size = 3.3, force = 5, force_pull = 0, max.time = 1,
# max.overlaps = Inf,
# segment.color = "lightgray",
# segment.curvature = 1,
# hjust = 1,
# nudge_y = 10,
# direction = "y"
# ) +
theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
freq_plot
Warning: `position_stack()` requires non-overlapping x intervals.
ggsave("figures/frequency_across_no0.pdf", width = 8, height = 4)
Warning: `position_stack()` requires non-overlapping x intervals.
ggsave("figures/frequency_across_no0.png", width = 8, height = 4)
Warning: `position_stack()` requires non-overlapping x intervals.
drop_na: removed 51,440 rows (24%), 166,701 rows remaining
`summarise()` has grouped output by 'subdiscipline_1'. You can override using the
`.groups` argument.
Joining with `by = join_by(subdiscipline_1, Test)`
full_join: added one column (n)
> rows only in x 25,199
> rows only in y 4,456
> matched rows 18,110
> ========
> rows total 47,765
`summarise()` has grouped output by 'subdiscipline_1'. You can override using the
`.groups` argument.
freq_plot <- ggplot(test_frequency, aes(n, count)) +
geom_bar(width = 0.1, fill = colors["novel"], stat = "identity") +
facet_wrap(~ subdiscipline_1, scales = "free_y") +
# scale_y_sqrt("Number of measures", breaks = c(0, 100, 400, 1000, 2000, 4000, 6000, 10000), limits = c(0, 11500)) +
scale_y_continuous("Number of measures", expand = expansion(c(0, 0.1))) +
scale_x_log10("Usages recorded in APA PsycInfo 1993-2022",
breaks = c(0.5, 1, 2, 10, 100, 1000, 25000),
labels = c(0, 1, 2, 10, 100, 1000, 25000)) +
geom_text(aes(label = if_else(n <= 2, sprintf("%.0f%%", percent*100), ""),
x = n, y = count ), size = 3, vjust = -0.4, hjust = 0.4) +
# scale_x_sqrt(breaks = c(0, 1, 2, 3, 4, 5, 10, 20, 40, 50), labels = c(0, 1, 2, 3, 4, 5, 10, 20, 40, "50+")) +
# geom_text_repel(aes(x = n, label = first_acronym, y = y),
# data =
# test_frequency %>% group_by(subdiscipline_1) %>% filter(row_number() > (n() - 10) ) %>% left_join(records_wide %>% select(Test = DOI, first_acronym)) %>%
# mutate(first_acronym = if_else(first_acronym == "HRSD", "HAM-D",
# first_acronym)) %>%
# mutate(y = 20 + 50*(1+n()-row_number())),
# size = 3.3, force = 5, force_pull = 0, max.time = 1,
# max.overlaps = Inf,
# segment.color = "lightgray",
# segment.curvature = 1,
# hjust = 1,
# nudge_y = 10,
# direction = "y"
# ) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
freq_plot
Warning: `position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
ggsave("figures/frequency.pdf", width = 8, height = 4)
Warning: `position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
ggsave("figures/frequency.png", width = 8, height = 4)
Warning: `position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
test_frequency <- test_frequency %>%
filter(n >= 1) %>%
group_by(subdiscipline_1) %>%
mutate(percent = count/sum(count))
freq_plot <- ggplot(test_frequency, aes(n, count)) +
geom_bar(width = 0.1, fill = colors["novel"], stat = "identity") +
facet_wrap(~ subdiscipline_1, scales = "free_y") +
# scale_y_sqrt("Number of measures", breaks = c(0, 100, 400, 1000, 2000, 4000, 6000, 10000), limits = c(0, 11500)) +
scale_y_continuous("Number of measures", expand = expansion(c(0, 0.1))) +
scale_x_log10("Usages recorded in APA PsycInfo 1993-2022",
breaks = c(1, 2, 5, 10, 100, 1000, 25000),
labels = c(1, 2, 5, 10, 100, 1000, 25000)) +
geom_text(aes(label = if_else(n <= 2, sprintf("%.0f%%", percent*100), ""),
x = n, y = count ), size = 3, vjust = -0.11, hjust = 0.4) +
# scale_x_sqrt(breaks = c(0, 1, 2, 3, 4, 5, 10, 20, 40, 50), labels = c(0, 1, 2, 3, 4, 5, 10, 20, 40, "50+")) +
# geom_text_repel(aes(x = n, label = first_acronym, y = y),
# data =
# test_frequency %>% group_by(subdiscipline_1) %>% filter(row_number() > (n() - 10) ) %>% left_join(records_wide %>% select(Test = DOI, first_acronym)) %>%
# mutate(first_acronym = if_else(first_acronym == "HRSD", "HAM-D",
# first_acronym)) %>%
# mutate(y = 20 + 50*(1+n()-row_number())),
# size = 3.3, force = 5, force_pull = 0, max.time = 1,
# max.overlaps = Inf,
# segment.color = "lightgray",
# segment.curvature = 1,
# hjust = 1,
# nudge_y = 10,
# direction = "y"
# ) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
freq_plot
Warning: `position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
ggsave("figures/frequency_no0.pdf", width = 8, height = 4)
Warning: `position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
`position_stack()` requires non-overlapping x intervals.
usage_by_year_instrument_type <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
group_by(instrument_type_broad, Year, Test = DOI) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
group_by(instrument_type_broad) %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(instrument_type_broad, n_tests, Year) %>%
summarise(n = sum(n),
diff_tests = n()) %>%
ungroup()
`summarise()` has grouped output by 'instrument_type_broad', 'Year'. You can
override using the `.groups` argument.
`summarise()` has grouped output by 'instrument_type_broad', 'n_tests'. You can
override using the `.groups` argument.
usage_by_year_instrument_type %>%
ggplot(., aes(Year, n, color = instrument_type_broad)) +
geom_line(size = 0.7) +
scale_y_continuous("Times tests were coded in PsycInfo") +
scale_x_continuous(limits = c(1993, 2030), breaks = c(1993, 1998, 2003, 2008, 2013, 2018, 2022)) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(aes(label = gsub("^.*$", " ", instrument_type_broad)), # This will force the correct position of the link's right end.
data = usage_by_year_instrument_type %>% filter(Year == max(Year, na.rm = T)),
segment.curvature = -0.1,
segment.square = TRUE,
segment.color = 'grey',
box.padding = 0.1,
point.padding = 0.6,
nudge_x = 0.15,
nudge_y = 0.05,
force = 0.5,
hjust = 0,
direction="y",
na.rm = TRUE
) +
geom_text_repel(data = usage_by_year_instrument_type %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ",str_replace(instrument_type_broad, " Psychology", ""), " (n=", n_tests, ")")),
segment.alpha = 0, ## This will 'hide' the link
segment.curvature = -0.1,
segment.square = TRUE,
# segment.color = 'grey',
box.padding = 0.1,
point.padding = 0.6,
nudge_x = 0.15,
nudge_y = 0.05,
force = 0.5,
hjust = 0,
direction="y",
na.rm = TRUE)+
theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
Warning: Removed 30 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/usage_by_year_instrument_type.pdf", width = 10, height = 4)
Warning: Removed 30 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/usage_by_year_instrument_type.png", width = 10, height = 4)
Warning: Removed 30 rows containing missing values or values outside the scale range
(`geom_line()`).
usage_by_year_instrument_type <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
group_by(InstrumentType, Year, Test = DOI) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
group_by(InstrumentType) %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(InstrumentType, n_tests, Year) %>%
summarise(n = sum(n),
diff_tests = n()) %>%
ungroup()
`summarise()` has grouped output by 'InstrumentType', 'Year'. You can override
using the `.groups` argument.
`summarise()` has grouped output by 'InstrumentType', 'n_tests'. You can override
using the `.groups` argument.
usage_by_year_instrument_type %>%
ggplot(., aes(Year, n, color = InstrumentType)) +
geom_line(size = 0.7) +
scale_y_continuous("Times tests were coded in PsycInfo") +
scale_x_continuous(limits = c(1993, 2035), breaks = c(1993, 1998, 2003, 2008, 2013, 2018, 2022)) +
scale_color_discrete() +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(aes(label = gsub("^.*$", " ", InstrumentType)), # This will force the correct position of the link's right end.
data = usage_by_year_instrument_type %>% filter(Year == max(Year, na.rm = T)),
segment.curvature = -0.1,
segment.square = TRUE,
segment.color = 'grey',
box.padding = 0.1,
point.padding = 0.6,
nudge_x = 0.15,
nudge_y = 0.05,
force = 0.5,
hjust = 0,
direction="y",
na.rm = TRUE
) +
geom_text_repel(data = usage_by_year_instrument_type %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ",str_replace(InstrumentType, " Psychology", ""), " (n=", n_tests, ")")),
segment.alpha = 0, ## This will 'hide' the link
segment.curvature = -0.1,
segment.square = TRUE,
# segment.color = 'grey',
box.padding = 0.1,
point.padding = 0.6,
nudge_x = 0.15,
nudge_y = 0.05,
force = 0.5,
hjust = 0,
direction="y",
na.rm = TRUE)+
theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
Warning: ggrepel: 11 unlabeled data points (too many overlaps).
Consider increasing max.overlaps
Warning: ggrepel: 12 unlabeled data points (too many overlaps).
Consider increasing max.overlaps
ggsave("figures/usage_by_year_instrument_type_narrow.pdf", width = 14, height = 10)
Warning: ggrepel: 7 unlabeled data points (too many overlaps).
Consider increasing max.overlaps
Warning: ggrepel: 9 unlabeled data points (too many overlaps).
Consider increasing max.overlaps
ggsave("figures/usage_by_year_instrument_type_narrow.png", width = 14, height = 10)
Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider increasing max.overlaps
ggrepel: 9 unlabeled data points (too many overlaps). Consider increasing max.overlaps
byorig_entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
mutate(Test = if_else(test_type == "Original", DOI, original_test_DOI)) %>%
drop_na(Test) %>%
group_by(Year, Test) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
ungroup() %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(n_tests, Year) %>%
filter(n > 0) %>%
summarise(entropy = entropy(n),
norm_entropy = calc_norm_entropy(n),
n = sum(n),
diff_tests = n()) %>%
ungroup()
drop_na: removed 45,635 rows (22%), 158,814 rows remaining
`summarise()` has grouped output by 'Year'. You can override using the `.groups`
argument.
`summarise()` has grouped output by 'n_tests'. You can override using the
`.groups` argument.
# what's the point of mutate(n_tests = n_distinct(Test))? just to check?
byconstruct_entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
drop_na(first_construct) %>%
group_by(Year, Test = first_construct) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
ungroup() %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(n_tests, Year) %>%
filter(n > 0) %>%
summarise(entropy = entropy(n),,
norm_entropy = calc_norm_entropy(n),
n = sum(n),
diff_tests = n()) %>%
ungroup()
drop_na: no rows removed
`summarise()` has grouped output by 'Year'. You can override using the `.groups` argument.`summarise()` has grouped output by 'n_tests'. You can override using the `.groups` argument.
all_entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
drop_na(DOI) %>%
group_by(Year, Test = DOI) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
ungroup() %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(n_tests, Year) %>%
filter(n > 0) %>%
summarise(entropy = entropy(n),,
norm_entropy = calc_norm_entropy(n),
n = sum(n),
diff_tests = n()) %>%
ungroup()
drop_na: no rows removed
`summarise()` has grouped output by 'Year'. You can override using the `.groups` argument.`summarise()` has grouped output by 'n_tests'. You can override using the `.groups` argument.
original_entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
filter(test_type == "Original") %>%
group_by(Year, Test = DOI) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
ungroup() %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(n_tests, Year) %>%
filter(n > 0) %>%
summarise(entropy = entropy(n),,
norm_entropy = calc_norm_entropy(n),
n = sum(n),
diff_tests = n()) %>%
ungroup()
`summarise()` has grouped output by 'Year'. You can override using the `.groups`
argument.
`summarise()` has grouped output by 'n_tests'. You can override using the
`.groups` argument.
entropy_by_year <- bind_rows(# "all tests" = all_entropy_by_year,
"measures" = original_entropy_by_year,
"with translations\n and revisions" = byorig_entropy_by_year,
# "by name base" = bybase_entropy_by_year,
"constructs" = byconstruct_entropy_by_year,
.id = "version")
entropy_by_year %>%
ggplot(., aes(Year, norm_entropy, color = version)) +
geom_line(size = 0.7) +
scale_y_continuous("Normalized Shannon Entropy", limits = c(0, 1), labels = scales::percent) +
# geom_line(aes(y = log(n)), color = 'red') +
scale_x_continuous("Usage year as coded in APA PsycInfo", limits = c(1993, 2030),
breaks = seq(1993, 2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022")) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
geom_vline(xintercept = 2016, linetype = 'dashed') +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = entropy_by_year %>% drop_na() %>% group_by(version) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ",version)), # "\n (n = ", n_tests, ")"
segment.curvature = -0.5,
segment.square = TRUE,
segment.color = 'grey',
xlim = c(2023, 2030),
nudge_x = 1.14,
lineheight = .9,
hjust = 0,
direction="y",
na.rm = TRUE) +
theme_minimal(base_size = 13) +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
drop_na: no rows removed
entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
group_by(subdiscipline_1, Year, Test = DOI) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
group_by(subdiscipline_1) %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(subdiscipline_1, n_tests, Year) %>%
filter(n > 0) %>%
summarise(entropy = entropy(n),,
norm_entropy = calc_norm_entropy(n),
n = sum(n),
diff_tests = n()) %>%
ungroup()
`summarise()` has grouped output by 'subdiscipline_1', 'Year'. You can override
using the `.groups` argument.
`summarise()` has grouped output by 'subdiscipline_1', 'n_tests'. You can
override using the `.groups` argument.
entropy_by_year %>%
ggplot(., aes(Year, norm_entropy, color = subdiscipline_1)) +
geom_line(size = 0.7) +
scale_y_continuous("Normalized Shannon Entropy\n(with revisions and translations)", limits = c(0, 1), labels = scales::percent) +
# geom_line(aes(y = log(n)), color = 'red') +
scale_x_continuous("Usage year as coded in APA PsycInfo", limits = c(1993, 2033),
breaks = seq(1993,2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022"),
expand = expansion(add = c(0, 14))) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggtitle(str_c(n_distinct(tests_by_year$Test), " measures tracked in PsycInfo")) +
# geom_text_repel(aes(label = gsub("^.*$", " ", subdiscipline_1)), # This will force the correct position of the link's right end.
# data = entropy_by_year %>% drop_na() %>% group_by(subdiscipline_1) %>% filter(Year == max(Year, na.rm = T)),
# segment.curvature = -0.1,
# segment.square = TRUE,
# segment.color = 'grey',
# box.padding = 0.1,
# point.padding = 0.6,
# max.overlaps = Inf,
# nudge_x = 1.3,
# # nudge_y = 0,
# force = 20,
# hjust = 0,
# direction="y",
# na.rm = TRUE
# ) +
geom_text_repel(data = entropy_by_year %>% drop_na() %>% group_by(subdiscipline_1) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ",str_replace(subdiscipline_1, " Psychology", ""), " (n=", n_tests, ")")),
# segment.alpha = 0, ## This will 'hide' the link
segment.curvature = -0.1,
segment.square = TRUE,
segment.color = 'grey',
box.padding = 0.1,
max.overlaps = Inf,
point.padding = 0.6,
xlim = c(2022, NA),
nudge_x = 2,
# nudge_y = 0.0,
force = 5,
hjust = 0,
direction="y",
na.rm = F) +
theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none") +
coord_cartesian(clip = "off")
drop_na: removed one row (1%), 149 rows remaining
entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
mutate(Test = if_else(test_type == "Original", DOI, original_test_DOI)) %>%
drop_na(Test) %>%
group_by(subdiscipline_1, Year, Test) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
group_by(subdiscipline_1) %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(subdiscipline_1, n_tests, Year) %>%
filter(n > 0) %>%
summarise(entropy = entropy(n),,
norm_entropy = calc_norm_entropy(n),
n = sum(n),
diff_tests = n()) %>%
ungroup()
drop_na: removed 45,635 rows (22%), 158,814 rows remaining
`summarise()` has grouped output by 'subdiscipline_1', 'Year'. You can override
using the `.groups` argument.
`summarise()` has grouped output by 'subdiscipline_1', 'n_tests'. You can
override using the `.groups` argument.
entropy_by_year %>%
ggplot(., aes(Year, norm_entropy, color = subdiscipline_1)) +
geom_line(size = 0.7) +
scale_y_continuous("Normalized Shannon Entropy\n(novel measures)", limits = c(0, 1), labels = scales::percent) +
# geom_line(aes(y = log(n)), color = 'red') +
scale_x_continuous("Usage year as coded in APA PsycInfo", limits = c(1993, 2030),
breaks = seq(1993, 2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022"),
expand = expansion(add = c(0, 17))) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggtitle(str_c(n_distinct(tests_by_year$Test), " measures tracked in PsycInfo")) +
# geom_text_repel(aes(label = gsub("^.*$", " ", subdiscipline_1)), # This will force the correct position of the link's right end.
# data = entropy_by_year %>% drop_na() %>% group_by(subdiscipline_1) %>% filter(Year == max(Year, na.rm = T)),
# segment.curvature = -0.1,
# segment.square = TRUE,
# segment.color = 'grey',
# box.padding = 0.1,
# point.padding = 0.6,
# max.overlaps = Inf,
# nudge_x = 1.3,
# # nudge_y = 0,
# force = 20,
# hjust = 0,
# direction="y",
# na.rm = TRUE
# ) +
geom_text_repel(data = entropy_by_year %>% drop_na() %>% group_by(subdiscipline_1) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ",str_replace(subdiscipline_1, " Psychology", ""), " (n=", n_tests, ")")),
# segment.alpha = 0, ## This will 'hide' the link
segment.curvature = -0.1,
segment.square = TRUE,
segment.color = 'grey',
box.padding = 0.1,
max.overlaps = Inf,
point.padding = 0.6,
xlim = c(2022, NA),
nudge_x = 2,
# nudge_y = 0.0,
force = 5,
hjust = 0,
direction="y",
na.rm = F) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none") +
coord_cartesian(clip = "off")
drop_na: removed one row (1%), 148 rows remaining
entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
group_by(subdiscipline_1, Year, Test = first_construct) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
group_by(subdiscipline_1) %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(subdiscipline_1, n_tests, Year) %>%
filter(n > 0) %>%
summarise(entropy = entropy(n),,
norm_entropy = calc_norm_entropy(n),
n = sum(n),
diff_tests = n()) %>%
ungroup()
`summarise()` has grouped output by 'subdiscipline_1', 'Year'. You can override
using the `.groups` argument.
`summarise()` has grouped output by 'subdiscipline_1', 'n_tests'. You can
override using the `.groups` argument.
entropy_by_year %>%
ggplot(., aes(Year, norm_entropy, color = subdiscipline_1)) +
geom_line(size = 0.7) +
scale_y_continuous("Normalized Shannon Entropy (constructs)", limits = c(0, 1), labels = scales::percent) +
# geom_line(aes(y = log(n)), color = 'red') +
scale_x_continuous("Usage year as coded in APA PsycInfo", limits = c(1993, 2038),
breaks = seq(1993, 2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022"),
expand = expansion(add = c(0, 10))) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggtitle(str_c(n_distinct(tests_by_year$Test), " measures tracked in PsycInfo")) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = entropy_by_year %>% drop_na() %>% group_by(subdiscipline_1) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ",str_replace(subdiscipline_1, " Psychology", ""), " (n=", n_tests, ")")),
# segment.alpha = 0, ## This will 'hide' the link
segment.curvature = -0.1,
segment.square = TRUE,
segment.color = 'grey',
box.padding = 0.1,
max.overlaps = Inf,
point.padding = 0.6,
xlim = c(2022, NA),
nudge_x = 2,
# nudge_y = 0.0,
force = 5,
hjust = 0,
direction="y",
na.rm = F) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
drop_na: removed one row (1%), 149 rows remaining
entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
group_by(instrument_type_broad, Year, Test = DOI) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
group_by(instrument_type_broad) %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(instrument_type_broad, n_tests, Year) %>%
filter(n > 0) %>%
summarise(entropy = entropy(n),
norm_entropy = calc_norm_entropy(n),
n = sum(n),
diff_tests = n()) %>%
ungroup()
`summarise()` has grouped output by 'instrument_type_broad', 'Year'. You can
override using the `.groups` argument.
`summarise()` has grouped output by 'instrument_type_broad', 'n_tests'. You can
override using the `.groups` argument.
entropy_by_year %>%
ggplot(., aes(Year, norm_entropy, color = instrument_type_broad)) +
geom_line(size = 0.7) +
scale_y_continuous("Normalized Shannon Entropy", limits = c(0, 1), labels = scales::percent) +
# geom_line(aes(y = log(n)), color = 'red') +
scale_x_continuous(limits = c(1993, 2027), breaks = c(1993, 1998, 2003, 2008, 2013, 2018, 2022)) +
scale_color_brewer(type = "qual", guide = "none", palette = 3) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(aes(label = gsub("^.*$", " ", instrument_type_broad)), # This will force the correct position of the link's right end.
data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
segment.curvature = -0.1,
segment.square = TRUE,
segment.color = 'grey',
box.padding = 0.1,
point.padding = 0.6,
nudge_x = 0.15,
nudge_y = 0.05,
force = 0.5,
hjust = 0,
direction="y",
na.rm = TRUE
) +
geom_text_repel(data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ",str_replace(instrument_type_broad, " Psychology", ""), " (n=", n_tests, ")")),
segment.alpha = 0, ## This will 'hide' the link
segment.curvature = -0.1,
segment.square = TRUE,
# segment.color = 'grey',
box.padding = 0.1,
point.padding = 0.6,
nudge_x = 0.15,
nudge_y = 0.05,
force = 0.5,
hjust = 0,
direction="y",
na.rm = TRUE)+
theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
Warning: Removed 31 rows containing missing values or values outside the scale range
(`geom_line()`).
test_frequency <- psyctests_info %>%
mutate(Test = if_else(test_type == "Original", DOI, original_test_DOI)) %>%
drop_na(Test) %>%
# filter(TestYear >= 1990) %>%
filter(between(Year, 1993, 2022)) %>%
group_by(subdiscipline_1, Test) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
arrange(n) %>%
mutate(decile = Hmisc::cut2(n, g = 10)) %>%
mutate(cumsum = cumsum(n),
sum = sum(n))
drop_na: removed 51,440 rows (24%), 166,701 rows remaining
`summarise()` has grouped output by 'subdiscipline_1'. You can override using the
`.groups` argument.
#
# test_frequency %>%
# group_by(subdiscipline_1, decile) %>%
# summarise(share = sum(n)/first(sum),
# median_n = median(n),
# n_measures = n()) %>%
# View()
ggplot(test_frequency, aes(n)) +
stat_lorenz(desc = F) +
coord_fixed() +
geom_abline(linetype = "dashed") +
theme_minimal() +
hrbrthemes::scale_x_percent("Cumulative percentage of measures") +
hrbrthemes::scale_y_percent("Cumulative percentage of measure market share") #+
# hrbrthemes::theme_ipsum_rc()
ggplot(test_frequency, aes(n, color = subdiscipline_1)) +
stat_lorenz(desc = F) +
coord_fixed() +
geom_abline(linetype = "dashed") +
theme_minimal() +
hrbrthemes::scale_x_percent("Cumulative percentage of measures") +
hrbrthemes::scale_y_percent("Cumulative percentage of measure market share")
constructs <- psyctests_info %>%
# filter(between(first_pub_year, 1950, 2015)) %>%
unnest(ConstructList) %>%
rowwise() %>%
mutate(construct = unlist(ConstructList)) %>%
select(-ConstructList) %>%
filter(between(Year, 1993, 2022)) %>%
drop_na(construct) %>%
mutate(survival = last_pub_year - first_pub_year,
survived_five = if_else(survival >= 5, T, F),
survived_ten = if_else(survival >= 10, T, F)) %>%
distinct(construct, .keep_all = TRUE)
drop_na: no rows removed
mean(constructs$survival, na.rm = T)
[1] 6.120457
sd(constructs$survival, na.rm = T)
[1] 8.788988
median(constructs$survival, na.rm = T)
[1] 2
max(constructs$survival, na.rm = T)
[1] 122
min(constructs$survival, na.rm = T)
[1] 0
[1] 6.046785
sd(measures$survival, na.rm = T)
[1] 8.743927
median(measures$survival, na.rm = T)
[1] 2
max(measures$survival, na.rm = T)
[1] 122
min(measures$survival, na.rm = T)
[1] 0
cumsum_construct <- constructs %>%
arrange(TestYear) %>%
group_by(TestYear) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(TestYear) %>%
mutate(constructs = cumsum(constructs))
cumsum_construct_survived_5 <- constructs %>%
filter(survived_five == T) %>%
arrange(TestYear) %>%
group_by(TestYear) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(TestYear) %>%
mutate(constructs = cumsum(constructs))
cumsum_construct_survived_10 <- constructs %>%
filter(survived_ten == T) %>%
arrange(TestYear) %>%
group_by(TestYear) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(TestYear) %>%
mutate(constructs = cumsum(constructs))
cumsums <- bind_rows(
"all constructs" = cumsum_construct,
"constructs in use\n for => 5 years" = cumsum_construct_survived_5,
"constructs in use\n for => 10 years" = cumsum_construct_survived_10,
.id = "origin"
) %>%
rename(Year = TestYear) %>%
filter(Year <= 2022)
ggplot(cumsums, aes(Year, constructs, color = origin)) +
geom_line() +
geom_vline(xintercept = 2016, linetype = 'dashed') +
scale_y_continuous("Cumulative number of constructs") +
scale_x_continuous("Publication year in APA PsycTests",
limits = c(1993, 2030),
breaks = seq(1993,2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022"),
expand = expansion(add = c(0, 1))) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = cumsums %>% drop_na() %>% group_by(origin) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ", origin, "\n (n = ", constructs, ")")),
segment.square = TRUE,
lineheight = .9,
segment.color = 'grey',
nudge_x = 1.2,
hjust = 0,
na.rm = TRUE)+
# theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none") +
guides(
x = guide_axis(cap = "both"), # Cap both ends
)
drop_na: no rows removed
Warning: Removed 187 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsums_survival_all.pdf", width = 8, height = 4)
Warning: Removed 187 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsums_survival_all.png", width = 8, height = 4)
Warning: Removed 187 rows containing missing values or values outside the scale range
(`geom_line()`).
first_constructs <- constructs %>%
distinct(first_construct, .keep_all = TRUE)
cumsum_construct <- first_constructs %>%
arrange(TestYear) %>%
group_by(TestYear) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(TestYear) %>%
mutate(constructs = cumsum(constructs))
cumsum_construct_survived_5 <- first_constructs %>%
filter(survived_five == T) %>%
arrange(TestYear) %>%
group_by(TestYear) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(TestYear) %>%
mutate(constructs = cumsum(constructs))
cumsum_construct_survived_10 <- first_constructs %>%
filter(survived_ten == T) %>%
arrange(TestYear) %>%
group_by(TestYear) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(TestYear) %>%
mutate(constructs = cumsum(constructs))
cumsums <- bind_rows(
"all constructs" = cumsum_construct,
"constructs in use\n for => 5 years" = cumsum_construct_survived_5,
"constructs in use\n for => 10 years" = cumsum_construct_survived_10,
.id = "origin"
) %>%
rename(Year = TestYear) %>%
filter(Year <= 2022)
ggplot(cumsums, aes(Year, constructs, color = origin)) +
geom_line() +
geom_vline(xintercept = 2016, linetype = 'dashed') +
scale_y_continuous("Cumulative number of constructs") +
scale_x_continuous("Publication year in APA PsycTests",
limits = c(1993, 2030),
breaks = seq(1993,2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022"),
expand = expansion(add = c(0, 1))) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = cumsums %>% drop_na() %>% group_by(origin) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ", origin, "\n (n = ", constructs, ")")),
segment.square = TRUE,
lineheight = .9,
segment.color = 'grey',
nudge_x = 1.2,
hjust = 0,
na.rm = TRUE)+
# theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none") +
guides(
x = guide_axis(cap = "both"), # Cap both ends
)
drop_na: no rows removed
Warning: Removed 187 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsums_survival_first.pdf", width = 8, height = 4)
Warning: Removed 187 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsums_survival_first.png", width = 8, height = 4)
Warning: Removed 187 rows containing missing values or values outside the scale range
(`geom_line()`).
cumsum_all <- measures %>%
group_by(TestYear) %>%
summarise(tests = n()) %>%
arrange(TestYear) %>%
mutate(tests = cumsum(tests))
cumsum_5 <- measures %>%
filter(survived_five == T) %>%
group_by(TestYear) %>%
summarise(tests = n()) %>%
arrange(TestYear) %>%
mutate(tests = cumsum(tests))
cumsum_10 <- measures %>%
filter(survived_ten == T) %>%
arrange(TestYear) %>%
distinct(Name_base, .keep_all = T) %>%
group_by(TestYear) %>%
summarise(tests = n_distinct(DOI)) %>%
mutate(tests = cumsum(tests))
cumsums <- bind_rows(
"all measures" = cumsum_all,
"measures in use\n for => 5 years" = cumsum_5,
"measures in use\n for => 10 years" = cumsum_10,
.id = "origin"
) %>%
rename(Year = TestYear) %>%
filter(Year <= 2022)
ggplot(cumsums, aes(Year, tests, color = origin)) +
geom_line() +
geom_vline(xintercept = 2016, linetype = 'dashed') +
scale_y_continuous("Cumulative number of measures") +
scale_x_continuous("Publication year in APA PsycTests",
limits = c(1993, 2030),
breaks = seq(1993,2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022"),
expand = expansion(add = c(0, 1))) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = cumsums %>% drop_na() %>% group_by(origin) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ", origin, "\n (n = ", tests, ")")),
segment.square = TRUE,
lineheight = .9,
segment.color = 'grey',
nudge_x = 1.2,
hjust = 0,
na.rm = TRUE) +
# theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none") +
guides(
x = guide_axis(cap = "both"), # Cap both ends
)
drop_na: no rows removed
Warning: Removed 220 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsums_survival_measures.pdf", width = 8, height = 4)
Warning: Removed 220 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsums_survival_measures.png", width = 8, height = 4)
Warning: Removed 220 rows containing missing values or values outside the scale range
(`geom_line()`).
cumsum_construct <- constructs %>%
arrange(first_pub_year) %>%
group_by(first_pub_year) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(first_pub_year) %>%
mutate(constructs = cumsum(constructs))
cumsum_construct_survived_5 <- constructs %>%
filter(survived_five == T) %>%
arrange(first_pub_year) %>%
group_by(first_pub_year) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(first_pub_year) %>%
mutate(constructs = cumsum(constructs))
cumsum_construct_survived_10 <- constructs %>%
filter(survived_ten == T) %>%
arrange(first_pub_year) %>%
group_by(first_pub_year) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(first_pub_year) %>%
mutate(constructs = cumsum(constructs))
cumsums <- bind_rows(
"all constructs" = cumsum_construct,
"constructs in use\n for => 5 years" = cumsum_construct_survived_5,
"constructs in use\n for => 10 years" = cumsum_construct_survived_10,
.id = "origin"
) %>%
rename(Year = first_pub_year) %>%
filter(Year <= 2022)
ggplot(cumsums, aes(Year, constructs, color = origin)) +
geom_line() +
geom_vline(xintercept = 2016, linetype = 'dashed') +
scale_y_continuous("Cumulative number of constructs") +
scale_x_continuous("First usage logged in PsycInfo",
limits = c(1993, 2030),
breaks = seq(1993,2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022"),
expand = expansion(add = c(0, 1))) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = cumsums %>% drop_na() %>% group_by(origin) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ", origin, "\n (n = ", constructs, ")")),
segment.square = TRUE,
lineheight = .9,
segment.color = 'grey',
nudge_x = 1.2,
hjust = 0,
na.rm = TRUE)+
# theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none") +
guides(
x = guide_axis(cap = "both"), # Cap both ends
)
drop_na: no rows removed
Warning: Removed 186 rows containing missing values or values outside the scale range
(`geom_line()`).
cumsum_construct <- constructs %>%
arrange(last_pub_year) %>%
group_by(last_pub_year) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(last_pub_year) %>%
mutate(constructs = cumsum(constructs))
cumsum_construct_survived_5 <- constructs %>%
filter(survived_five == T) %>%
arrange(last_pub_year) %>%
group_by(last_pub_year) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(last_pub_year) %>%
mutate(constructs = cumsum(constructs))
cumsum_construct_survived_10 <- constructs %>%
filter(survived_ten == T) %>%
arrange(last_pub_year) %>%
group_by(last_pub_year) %>%
summarise(constructs = n_distinct(DOI)) %>%
arrange(last_pub_year) %>%
mutate(constructs = cumsum(constructs))
cumsums <- bind_rows(
"all constructs" = cumsum_construct,
"constructs in use\n for => 5 years" = cumsum_construct_survived_5,
"constructs in use\n for => 10 years" = cumsum_construct_survived_10,
.id = "origin"
) %>%
rename(Year = last_pub_year) %>%
filter(Year <= 2022)
ggplot(cumsums, aes(Year, constructs, color = origin)) +
geom_line() +
geom_vline(xintercept = 2016, linetype = 'dashed') +
scale_y_continuous("Cumulative number of constructs") +
scale_x_continuous("Last usage logged in PsycInfo",
limits = c(1993, 2030),
breaks = seq(1993,2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022"),
expand = expansion(add = c(0, 1))) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = cumsums %>% drop_na() %>% group_by(origin) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ", origin, "\n (n = ", constructs, ")")),
segment.square = TRUE,
lineheight = .9,
segment.color = 'grey',
nudge_x = 1.2,
hjust = 0,
na.rm = TRUE)+
# theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none") +
guides(
x = guide_axis(cap = "both"), # Cap both ends
)
drop_na: no rows removed
count_construct <- constructs %>%
arrange(TestYear) %>%
group_by(TestYear) %>%
summarise(tests = n_distinct(construct)) %>%
arrange(TestYear)
count_construct_5 <- constructs %>%
filter(survived_five == T) %>%
arrange(TestYear) %>%
group_by(TestYear) %>%
summarise(tests = n_distinct(construct)) %>%
arrange(TestYear)
count_construct_10 <- constructs %>%
filter(survived_ten == T) %>%
arrange(TestYear) %>%
group_by(TestYear) %>%
summarise(tests = n_distinct(construct)) %>%
arrange(TestYear)
counts <- bind_rows(
"all constructs" = count_construct,
"constructs in use\n for => 5 years" = count_construct_5,
"constructs in use\n for => 10 years" = count_construct_10,
.id = "origin"
) %>%
rename(Year = TestYear) %>%
filter(Year <= 2022)
ggplot(counts, aes(Year, tests, color = origin)) +
geom_line() +
geom_vline(xintercept = 2016, linetype = 'dashed') +
scale_y_continuous("Number of constructs") +
scale_x_continuous("Publication year in APA PsycTests",
limits = c(1993, 2030),
breaks = seq(1993, 2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022")) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = counts %>% drop_na() %>% group_by(origin) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ", origin)),
segment.curvature = -0.1,
segment.square = TRUE,
lineheight = .9,
# segment.color = 'grey',
box.padding = 0.1,
point.padding = 0.6,
nudge_x = 1.5,
nudge_y = -15,
force = 1,
hjust = 0,
direction="y",
na.rm = F) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
drop_na: no rows removed
Warning: Removed 187 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/counts_survival.pdf", width = 8, height = 4)
Warning: Removed 187 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/counts_survival.png", width = 8, height = 4)
Warning: Removed 187 rows containing missing values or values outside the scale range
(`geom_line()`).
all_constructs_over_time <- records_wide %>% select(subdiscipline_1, DOI, TestYear, ConstructList) %>%
unnest(ConstructList) %>%
rowwise() %>%
mutate(construct = unlist(ConstructList)) %>%
select(-ConstructList)
cumsum_all_constructs <- all_constructs_over_time %>%
arrange(TestYear) %>%
distinct(construct, .keep_all = T) %>%
group_by(TestYear) %>%
summarise(constructs = n_distinct(construct)) %>%
arrange(TestYear) %>%
mutate(constructs = cumsum(constructs))
cumsum_construct <- records_wide %>%
arrange(TestYear) %>%
distinct(first_construct, .keep_all = T) %>%
group_by(TestYear) %>%
summarise(constructs = n_distinct(first_construct)) %>%
arrange(TestYear) %>%
mutate(constructs = cumsum(constructs))
cumsums <- bind_rows(
"first constructs" = cumsum_construct,
"all constructs" = cumsum_all_constructs,
.id = "origin"
) %>%
rename(Year = TestYear) %>%
filter(Year <= 2022)
ggplot(cumsums, aes(Year, constructs, color = origin)) +
geom_line() +
scale_y_continuous("Cumulative number of constructs") +
scale_x_continuous("Publication year in APA PsycTests",
limits = c(1993, 2027),
breaks = seq(1993, 2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022")) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = cumsums %>% drop_na() %>% group_by(origin) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ", origin, "\n (n = ", constructs, ")")),
segment.square = TRUE,
lineheight = .9,
segment.color = 'grey',
nudge_x = 1.2,
hjust = 0,
na.rm = TRUE)
drop_na: no rows removed
Warning: Removed 170 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsum_all_vs_first.pdf", width = 8, height = 4)
Warning: Removed 170 rows containing missing values or values outside the scale range
(`geom_line()`).
ggsave("figures/cumsum_all_vs_first.png", width = 8, height = 4)
Warning: Removed 170 rows containing missing values or values outside the scale range
(`geom_line()`).
1 2 3 4 5 6 7 8 9 10 12
56645 12421 2134 326 85 37 13 14 3 3 3
15 16 17 21 22 23 25
1 1 1 1 1 1 2
round(prop.table(table(records_wide$constructs_n)),2)
1 2 3 4 5 6 7 8 9 10 12 15 16 17
0.79 0.17 0.03 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
21 22 23 25
0.00 0.00 0.00 0.00
Expanding the entropy calculation to all coded constructs makes little difference.
all_constructs_over_time <- psyctests_info %>% select(subdiscipline_1, DOI, TestYear, Year, ConstructList, usage_count) %>%
unnest(ConstructList) %>%
rowwise() %>%
mutate(construct = unlist(ConstructList)) %>%
select(-ConstructList)
entropy_all_constructs <- all_constructs_over_time %>%
filter(between(Year, 1993, 2022)) %>%
drop_na(construct) %>%
group_by(Year, construct) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
ungroup() %>%
mutate(n_tests = n_distinct(construct)) %>%
group_by(n_tests, Year) %>%
filter(n > 0) %>%
summarise(entropy = entropy(n),,
norm_entropy = calc_norm_entropy(n),
n = sum(n),
diff_tests = n()) %>%
ungroup()
drop_na: no rows removed
`summarise()` has grouped output by 'Year'. You can override using the `.groups`
argument.
`summarise()` has grouped output by 'n_tests'. You can override using the
`.groups` argument.
byconstruct_entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
drop_na(first_construct) %>%
group_by(Year, Test = first_construct) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
ungroup() %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(n_tests, Year) %>%
filter(n > 0) %>%
summarise(entropy = entropy(n),,
norm_entropy = calc_norm_entropy(n),
n = sum(n),
diff_tests = n()) %>%
ungroup()
drop_na: no rows removed
`summarise()` has grouped output by 'Year'. You can override using the `.groups` argument.`summarise()` has grouped output by 'n_tests'. You can override using the `.groups` argument.
entropy_by_year <- bind_rows(# "all tests" = all_entropy_by_year,
"all constructs" = entropy_all_constructs,
# "by name base" = bybase_entropy_by_year,
"first constructs" = byconstruct_entropy_by_year,
.id = "version")
entropy_by_year %>%
ggplot(., aes(Year, norm_entropy, color = version)) +
geom_line(size = 0.7) +
scale_y_continuous("Normalized Shannon Entropy", limits = c(0, 1), labels = scales::percent) +
# geom_line(aes(y = log(n)), color = 'red') +
scale_x_continuous("Usage year as coded in APA PsycInfo", limits = c(1993, 2027),
breaks = seq(1993, 2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022")) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(data = entropy_by_year %>% drop_na() %>% group_by(version) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ",version)), # "\n (n = ", n_tests, ")"
segment.curvature = -0.5,
segment.square = TRUE,
segment.color = 'grey',
xlim = c(2023, 2030),
nudge_x = 1.14,
lineheight = .9,
hjust = 0,
direction="y",
na.rm = TRUE) +
theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
drop_na: no rows removed
byorig_entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
mutate(Test = if_else(test_type == "Original", DOI, original_test_DOI)) %>%
drop_na(Test) %>%
group_by(Year, Test) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
ungroup() %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(n_tests, Year) %>%
filter(n > 0) %>%
summarise(norm_entropy = calc_norm_entropy(n),
norm_entropy_MM = entropy(n, method = "MM") / log(n()),
n = sum(n),
diff_tests = n()) %>%
ungroup()
drop_na: removed 45,635 rows (22%), 158,814 rows remaining
`summarise()` has grouped output by 'Year'. You can override using the `.groups`
argument.
`summarise()` has grouped output by 'n_tests'. You can override using the
`.groups` argument.
n_distinct(Test)
1 0.5517793
byconstruct_entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
drop_na(first_construct) %>%
group_by(Year, Test = first_construct) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
ungroup() %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(n_tests, Year) %>%
filter(n > 0) %>%
summarise(norm_entropy = calc_norm_entropy(n),
norm_entropy_MM = entropy(n, method = "MM") / log(n()),
n = sum(n),
diff_tests = n()) %>%
ungroup()
drop_na: no rows removed
`summarise()` has grouped output by 'Year'. You can override using the `.groups` argument.`summarise()` has grouped output by 'n_tests'. You can override using the `.groups` argument.
all_entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
drop_na(DOI) %>%
group_by(Year, Test = DOI) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
ungroup() %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(n_tests, Year) %>%
filter(n > 0) %>%
summarise(norm_entropy = calc_norm_entropy(n),
norm_entropy_MM = entropy(n, method = "MM") / log(n()),
n = sum(n),
diff_tests = n()) %>%
ungroup()
drop_na: no rows removed
`summarise()` has grouped output by 'Year'. You can override using the `.groups` argument.`summarise()` has grouped output by 'n_tests'. You can override using the `.groups` argument.
original_entropy_by_year <- psyctests_info %>%
filter(between(Year, 1993, 2022)) %>%
filter(test_type == "Original") %>%
group_by(Year, Test = DOI) %>%
summarise(n = sum(usage_count, na.rm = T)) %>%
ungroup() %>%
mutate(n_tests = n_distinct(Test)) %>%
group_by(n_tests, Year) %>%
filter(n > 0) %>%
summarise(norm_entropy = calc_norm_entropy(n),
norm_entropy_MM = entropy(n, method = "MM") / log(n()),
n = sum(n),
diff_tests = n()) %>%
ungroup()
`summarise()` has grouped output by 'Year'. You can override using the `.groups`
argument.
`summarise()` has grouped output by 'n_tests'. You can override using the
`.groups` argument.
entropy_by_year <- bind_rows(# "all tests" = all_entropy_by_year,
"measures" = original_entropy_by_year,
"with translations\n and revisions" = byorig_entropy_by_year,
# "by name base" = bybase_entropy_by_year,
"constructs" = byconstruct_entropy_by_year,
.id = "version")
plot_entropy <- entropy_by_year %>%
ggplot(., aes(Year, norm_entropy, color = version)) +
geom_line(size = 0.7, linetype = "dashed") +
geom_line(aes(y = norm_entropy_MM), size = 0.7) +
scale_y_continuous("Normalized Shannon Entropy", limits = c(0, 1), labels = scales::percent) +
# geom_line(aes(y = log(n)), color = 'red') +
scale_x_continuous("Usage year as coded in APA PsycInfo", limits = c(1993, 2027),
breaks = seq(1993, 2022, by = 1),
labels = c(1993, "", "", "", "", 1998, "", "", "", "", 2003, "", "", "", "", 2008, "", "", "", "", 2013, "", "", "", "", 2018, "", "", "", "2022")) +
# ggtitle(str_c(n_distinct(tests_by_year$Test), " measures tracked in PsycInfo")) +
# annotate("text", x = 1993, y = 1, label = "- each used once",
# size = 3.3, vjust = 0.3, hjust = 0.05) +
# annotate("text", x = 1993, y = 0, label = "- all used one",
# size = 3.3, vjust = 0.3, hjust = 0.05) +
scale_color_brewer(type = "qual", guide = "none", palette = 2) +
# ggrepel::geom_text_repel(
# aes(label = str_replace(subdiscipline, " Psychology", "")), data = entropy_by_year %>% filter(Year == max(Year, na.rm = T)),
# size = 4, hjust = 1,
# ) +
geom_text_repel(aes(label = str_replace_all(version, "[a-z=0-9/() ]+", " ")), # This will force the correct position of the link's right end.
data = entropy_by_year %>% drop_na() %>% group_by(version) %>% filter(Year == max(Year, na.rm = T)),
segment.curvature = -0.1,
segment.square = TRUE,
lineheight = .9,
segment.color = 'grey',
box.padding = 0.1,
point.padding = 0.6,
nudge_x = 1.15,
nudge_y = 0.03,
force = 0.9,
hjust = 0,
direction="y",
size = 3.3,
na.rm = TRUE) +
geom_text_repel(data = entropy_by_year %>% drop_na() %>% group_by(version) %>% filter(Year == max(Year, na.rm = T)),
aes(label = paste0(" ",version)), # "\n (n = ", n_tests, ")"
segment.alpha = 0, ## This will 'hide' the link
segment.curvature = -0.1,
segment.square = TRUE,
# segment.color = 'grey',
box.padding = 0.1,
point.padding = 0.6,
nudge_x = 1.15,
nudge_y = 0.0,
lineheight = .9,
force = 0.9,
size = 3.3,
hjust = 0,
direction="y",
na.rm = TRUE) +
theme_minimal(base_size = 13) +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none")
drop_na: no rows removed
drop_na: no rows removed
plot_entropy
entropy_by_class <- psyctests_info %>%
group_by(classification_1, DOI) %>%
summarise(n = sum(usage_count, na.rm = T),
parent = case_when(
n > 50 ~ "",
n > 20 ~ "used 21-50 times",
n > 5 ~ "used 6-20 times",
TRUE ~ "used 1-5 times")) %>%
group_by(classification_1) %>%
# filter(n > 0) %>%
summarise(
entropy = entropy(n),
norm_entropy = calc_norm_entropy(n)) %>%
arrange(norm_entropy)
`summarise()` has grouped output by 'classification_1'. You can override using
the `.groups` argument.
kable(entropy_by_class)
classification_1 | entropy | norm_entropy |
---|---|---|
Military Personnel, Adjustment, and Training | 2.383022 | 0.4815393 |
Anxiety and Depression | 3.193979 | 0.4847495 |
Aptitude and Achievement | 2.316471 | 0.5135321 |
Neuropsychological Assessment | 3.279933 | 0.5288051 |
Intelligence | 2.257675 | 0.5350566 |
General Assessment Tools | 2.473627 | 0.5371412 |
Functional Status and Adaptive Behavior | 3.530552 | 0.5868450 |
Trauma, Stress, and Coping | 4.183437 | 0.5982308 |
Emotional States, Emotional Responses, and Motivation | 4.248094 | 0.6077965 |
Mental Health/Illness Related Assessment | 4.840355 | 0.6092446 |
Addiction, Gambling, and Substance Abuse/Use | 4.368824 | 0.6220855 |
Cognitive Processes, Memory, and Decision Making | 4.430633 | 0.6357767 |
Sports, Recreation, and Leisure | 3.819379 | 0.6452859 |
Treatment, Rehabilitation, and Therapeutic Processes | 5.157328 | 0.6514703 |
Legal and Forensic Evaluation | 3.839622 | 0.6557771 |
Personality | 5.270944 | 0.6880603 |
Development and Aging | 5.170306 | 0.7010938 |
Physical Health/Illness Related Assessment | 5.519613 | 0.7074794 |
Perceptual, Motor, and Sensory Processing | 4.027230 | 0.7094247 |
Attitudes, Interests, Values, and Expectancies | 4.964110 | 0.7109935 |
Communication, Language, and Verbal Processing | 4.017755 | 0.7210416 |
Family Relationships and Parenting | 5.284498 | 0.7324078 |
Sex, Gender Roles, and Sexual Behavior | 4.922535 | 0.7417984 |
Social, Group, and Interpersonal Relationships | 5.558756 | 0.7492815 |
Religious and Political Beliefs | 4.667158 | 0.7656319 |
Consumer Behavior, Marketing, and Advertising | 4.587740 | 0.7679755 |
Culture, Racial, and Ethnic Identity | 4.787330 | 0.7693485 |
Human Factors and Environmental Engineering | 4.212094 | 0.7776987 |
Human-Computer Interaction | 4.726558 | 0.7843782 |
Organizational, Occupational, and Career Development | 6.103051 | 0.7895549 |
Education, Teaching, and Student Characteristics | 6.281884 | 0.7986947 |
psyctests_info %>%
group_by(instrument_type_broad, DOI) %>%
summarise(n = sum(usage_count, na.rm = T),
parent = case_when(
n > 50 ~ "",
n > 20 ~ "used 21-50 times",
n > 5 ~ "used 6-20 times",
TRUE ~ "used 1-5 times")) %>%
group_by(instrument_type_broad) %>%
filter(n > 0) %>%
summarise(
entropy = entropy(n),
norm_entropy = calc_norm_entropy(n)) %>%
arrange(norm_entropy) %>%
kable()
`summarise()` has grouped output by 'instrument_type_broad'. You can override
using the `.groups` argument.
instrument_type_broad | entropy | norm_entropy |
---|---|---|
other-rating | 4.454314 | 0.6201748 |
task | 4.041814 | 0.6228520 |
NA | 3.717207 | 0.6283096 |
test | 4.704202 | 0.6359652 |
questionnaire | 7.207948 | 0.7060498 |
[1] 1230
mean(same_name_tests$n)
[1] 2.44878
# A tibble: 20 × 2
name_psycinfo n
<chr> <int>
1 theory of planned behavior questionnaire 18
2 job satisfaction scale 15
3 self-efficacy scale 11
4 self-control scale 10
5 behavioral intentions measure 9
6 parental involvement scale 9
7 procedural justice scale 9
8 social distance scale 9
9 marital satisfaction measure 8
10 perceived behavioral control measure 8
11 religiosity measure 8
12 religiosity scale 8
13 social capital measure 8
14 social support scale 8
15 victimization measure 8
16 attribution questionnaire 7
17 delinquency measure 7
18 fear of crime scale 7
19 food frequency questionnaire 7
20 life satisfaction scale 7