── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.0 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Attaching package: ‘ape’
The following object is masked from ‘package:dplyr’:
where
workflow
Full disclosure: most of the code was written by ChatGPT, with some natural language input from me.
Here are some helper function, whose purpose should be clear from their names.
# Helper function to extract latter half of treesget_latter_half <-function(tree_list) { purrr::map(tree_list, ~ .x[round(length(.x) /2):length(.x)])}# Helper function to remove dummy taxa if it existsremove_dummy <-function(tree) {if ("dummy"%in% tree$tip.label) {drop.tip(tree, "dummy") } else { tree }}# Check file completionis_file_complete <-function(file) {if (file_exists(file)) { last_line <-read_lines(file) %>%tail(1)return(str_detect(last_line, "end;")) } return(FALSE)}
Now the posterior tree changes are extracted, cleaned and sampled.
# Get the list of family namesfamilies <-dir_ls("mrbayes/output") %>% purrr::map_chr(fs::path_file) %>%str_remove("\\..*") %>%unique() %>% purrr::keep(str_detect, "^0")# Check if all files in the family have completedfamilies_completed <- purrr::map_lgl(families, function(fm) { files <- glue::glue("mrbayes/output/{fm}.run{1:2}.t")all(purrr::map_lgl(files, is_file_complete))})names(families_completed) <- families# Extract and process treesfm_trees <- purrr::map(families, function(fm) {print(fm) files <- glue::glue("mrbayes/output/{fm}.run{1:2}.t")if (families_completed[fm]) { trees <- purrr::map(files, read.nexus) } else { trees_as_string <- purrr::map(files, function(file) { tree_string <-readLines(file, warn =FALSE)c(tree_string, "end;") }) trees <- purrr::map(trees_as_string, ~read.nexus(textConnection(.x))) } latter_half_trees <-do.call(c, get_latter_half(trees))if (length(latter_half_trees) <100) {stop(glue("Not enough trees in {fm}. Expected at least 100 but found {length(latter_half_trees)}.")) } purrr::map(sample(latter_half_trees, 100), remove_dummy)})names(fm_trees) <- families