Extract spore size information from mycoDB (https://www.mycodb.fr/).
Usage
tax_spores_size_pq(
physeq = NULL,
taxnames = NULL,
taxonomic_rank = "currentCanonicalSimple",
verbose = TRUE,
time_to_sleep = 0.5,
add_to_phyloseq = NULL,
col_prefix = NULL,
discard_genus_alone = taxonomic_rank == "currentCanonicalSimple",
discard_NA = TRUE
)Arguments
- physeq
(optional) A phyloseq object. Either `physeq` or `taxnames` must be provided, but not both.
- taxnames
(optional) A character vector of taxonomic names.
- taxonomic_rank
(Character, default "currentCanonicalSimple") The column(s) present in the @tax_table slot of the phyloseq object. Can be a vector of two columns (e.g. c("Genus", "Species")).
- verbose
(logical, default TRUE) If TRUE, prompt some messages.
- time_to_sleep
(numeric, default 0.5) Time to sleep between two queries to mycoDB, in seconds.
- add_to_phyloseq
(logical, default NULL) If TRUE, add the spore size information to the phyloseq object. If FALSE, return a data.frame. If NULL (default), add to phyloseq if `physeq` is provided, else return a data.frame.
- col_prefix
(character, default NULL) If not NULL, prefix to add to the new columns added to the phyloseq object.
Value
If `add_to_phyloseq` is TRUE, returns a phyloseq object with new columns in the tax_table slot: `spore_size`, `spore_length`, `spore_width`. If `add_to_phyloseq` is FALSE, returns a data.frame with columns `taxa_name`, `spore_size`, `spore_length`, `spore_width`.
Examples
if (FALSE) { # \dontrun{
data_fungi_mini_cleanNames <- data_fungi_mini |>
gna_verifier_pq()
data_fungi_mini_spore_size <- tax_spores_size_pq(data_fungi_mini_cleanNames)
psmelt(data_fungi_mini_spore_size) |>
group_by(taxa_name) |>
summarise(
spore_length = as.numeric(unique(spore_length_mean)),
spore_width = as.numeric(unique(spore_width_mean)),
Abundance = sum(Abundance),
Occurence = sum(Abundance > 0, na.rm = TRUE)
) |>
ggplot(aes(x = spore_length, y = spore_width, size = Abundance, col = Occurence)) +
geom_point(alpha = 0.7) +
ggrepel::geom_text_repel(aes(label = taxa_name),
vjust = -0.5,
size = 3,
fontface = "italic",
min.segment.length = 0.2,
force = 4
) +
labs(
title = "Spore sizes extracted from mycoDB",
x = "Spore length (\u00b5m)",
y = "Spore width (\u00b5m)",
col = "Number of samples",
size = "Number of sequences"
) +
theme_idest()
# Example with ellipses
psmelt(data_fungi_mini_spore_size) |>
filter(!is.na(taxa_name) & !taxa_name == "") |>
filter(!is.na(Time)) |>
filter(Abundance > 0) |>
mutate(taxa_name = as.factor(taxa_name)) |>
group_by(taxa_name, Time) |>
summarise(
spore_length = 0.2 * as.numeric(unique(spore_length_mean)),
spore_width = as.numeric(unique(spore_width_mean)),
Abundance = sum(Abundance),
Occurence = sum(Abundance > 0, na.rm = TRUE),
Order = unique(Order)
) |>
arrange(desc(Abundance)) |>
mutate(
taxa_name_num = as.numeric(taxa_name)
) |>
filter(!is.na(spore_length)) |>
ggplot(aes(
x0 = log(Abundance), y0 = taxa_name_num / 5,
a = spore_length / 2, b = spore_length / 2 / 5, fill = Order
)) +
coord_fixed() +
ggforce::geom_ellipse(aes(angle = 0), alpha = 0.3) +
ggrepel::geom_text_repel(aes(
x = log(Abundance), y = taxa_name_num / 5,
label = taxa_name, color = Order
), size = 2) +
theme_idest() +
theme(axis.text.y = element_blank()) +
labs(x = "Number of sequences (log scale)", y = "Taxa") +
facet_wrap(~Time, ncol = 2)
# Test for difference in mean spore length between sample's factor
psmelt(data_fungi_mini_spore_size) |>
filter(!is.na(taxa_name) & !taxa_name == "") |>
filter(!is.na(spore_length_mean)) |>
filter(!is.na(Time)) |>
filter(Abundance > 0) |>
mutate(taxa_name = as.factor(taxa_name)) |>
group_by(taxa_name, Time) |>
summarise(
spore_length = unique(as.numeric(spore_length_mean)),
spore_width = unique(as.numeric(spore_width_mean)),
Order = unique(Order)
) |>
ggstatsplot::ggbetweenstats(Time, spore_length)
} # }