Import

Show code
records_data_1 <- read_xml("../sober_rubric/raw_data/PTMain_01.xml")
records_data_2 <- read_xml("../sober_rubric/raw_data/PTMain_02.xml")
records_data_3 <- read_xml("../sober_rubric/raw_data/PTMain_03.xml")
records_data_4 <- read_xml("../sober_rubric/raw_data/PTMain_04.xml")
records_data_5 <- read_xml("../sober_rubric/raw_data/PTMain_05.xml")
records_data_6 <- read_xml("../sober_rubric/raw_data/PTMain_06.xml")
records_data_7 <- read_xml("../sober_rubric/raw_data/PTMain_07.xml")
records_data_8 <- read_xml("../sober_rubric/raw_data/PTMain_08.xml")



# convert to list, then dataframe
records_list_1 <- as_list(records_data_1) %>% as_tibble()
records_list_2 <- as_list(records_data_2) %>% as_tibble()
records_list_3 <- as_list(records_data_3) %>% as_tibble()
records_list_4 <- as_list(records_data_4) %>% as_tibble()
records_list_5 <- as_list(records_data_5) %>% as_tibble()
records_list_6 <- as_list(records_data_6) %>% as_tibble()
records_list_7 <- as_list(records_data_7) %>% as_tibble()
records_list_8 <- as_list(records_data_8) %>% as_tibble()


records_wide <-  bind_rows(records_list_1,
                       records_list_2,
                       records_list_3,
                       records_list_4,
                       records_list_5,
                       records_list_6,
                       records_list_7,
                       records_list_8) %>% 
  # remove meta-data rows (Owner and Summary)
  filter(sapply(PTXML, function(x) "DOI" %in% names(x))) %>% 
  # unnest the records to get the higher level variables
  unnest_wider(PTXML)

save(records_wide, file = "../sober_rubric/raw_data/records_wide.rda")