Skip to contents

Extract spore size information from mycoDB (https://www.mycodb.fr/).

Usage

tax_spores_size_pq(
  physeq = NULL,
  taxnames = NULL,
  taxonomic_rank = "currentCanonicalSimple",
  verbose = TRUE,
  time_to_sleep = 0.5,
  add_to_phyloseq = NULL,
  col_prefix = NULL,
  discard_genus_alone = taxonomic_rank == "currentCanonicalSimple",
  discard_NA = TRUE
)

Arguments

physeq

(optional) A phyloseq object. Either `physeq` or `taxnames` must be provided, but not both.

taxnames

(optional) A character vector of taxonomic names.

taxonomic_rank

(Character, default "currentCanonicalSimple") The column(s) present in the @tax_table slot of the phyloseq object. Can be a vector of two columns (e.g. c("Genus", "Species")).

verbose

(logical, default TRUE) If TRUE, prompt some messages.

time_to_sleep

(numeric, default 0.5) Time to sleep between two queries to mycoDB, in seconds.

add_to_phyloseq

(logical, default NULL) If TRUE, add the spore size information to the phyloseq object. If FALSE, return a data.frame. If NULL (default), add to phyloseq if `physeq` is provided, else return a data.frame.

col_prefix

(character, default NULL) If not NULL, prefix to add to the new columns added to the phyloseq object.

Value

If `add_to_phyloseq` is TRUE, returns a phyloseq object with new columns in the tax_table slot: `spore_size`, `spore_length`, `spore_width`. If `add_to_phyloseq` is FALSE, returns a data.frame with columns `taxa_name`, `spore_size`, `spore_length`, `spore_width`.

See also

[extract_spores_mycodb()]

Author

Adrien Taudiere

Examples

if (FALSE) { # \dontrun{
data_fungi_mini_cleanNames <- data_fungi_mini |>
  gna_verifier_pq()
data_fungi_mini_spore_size <- tax_spores_size_pq(data_fungi_mini_cleanNames)

psmelt(data_fungi_mini_spore_size) |>
  group_by(taxa_name) |>
  summarise(
    spore_length = as.numeric(unique(spore_length_mean)),
    spore_width = as.numeric(unique(spore_width_mean)),
    Abundance = sum(Abundance),
    Occurence = sum(Abundance > 0, na.rm = TRUE)
  ) |>
  ggplot(aes(x = spore_length, y = spore_width, size = Abundance, col = Occurence)) +
  geom_point(alpha = 0.7) +
  ggrepel::geom_text_repel(aes(label = taxa_name),
    vjust = -0.5,
    size = 3,
    fontface = "italic",
    min.segment.length = 0.2,
    force = 4
  ) +
  labs(
    title = "Spore sizes extracted from mycoDB",
    x = "Spore length (\u00b5m)",
    y = "Spore width (\u00b5m)",
    col = "Number of samples",
    size = "Number of sequences"
  ) +
  theme_idest()

# Example with ellipses
psmelt(data_fungi_mini_spore_size) |>
  filter(!is.na(taxa_name) & !taxa_name == "") |>
  filter(!is.na(Time)) |>
  filter(Abundance > 0) |>
  mutate(taxa_name = as.factor(taxa_name)) |>
  group_by(taxa_name, Time) |>
  summarise(
    spore_length = 0.2 * as.numeric(unique(spore_length_mean)),
    spore_width = as.numeric(unique(spore_width_mean)),
    Abundance = sum(Abundance),
    Occurence = sum(Abundance > 0, na.rm = TRUE),
    Order = unique(Order)
  ) |>
  arrange(desc(Abundance)) |>
  mutate(
    taxa_name_num = as.numeric(taxa_name)
  ) |>
  filter(!is.na(spore_length)) |>
  ggplot(aes(
    x0 = log(Abundance), y0 = taxa_name_num / 5,
    a = spore_length / 2, b = spore_length / 2 / 5, fill = Order
  )) +
  coord_fixed() +
  ggforce::geom_ellipse(aes(angle = 0), alpha = 0.3) +
  ggrepel::geom_text_repel(aes(
    x = log(Abundance), y = taxa_name_num / 5,
    label = taxa_name, color = Order
  ), size = 2) +
  theme_idest() +
  theme(axis.text.y = element_blank()) +
  labs(x = "Number of sequences (log scale)", y = "Taxa") +
  facet_wrap(~Time, ncol = 2)


# Test for difference in mean spore length between sample's factor
psmelt(data_fungi_mini_spore_size) |>
  filter(!is.na(taxa_name) & !taxa_name == "") |>
  filter(!is.na(spore_length_mean)) |>
  filter(!is.na(Time)) |>
  filter(Abundance > 0) |>
  mutate(taxa_name = as.factor(taxa_name)) |>
  group_by(taxa_name, Time) |>
  summarise(
    spore_length = unique(as.numeric(spore_length_mean)),
    spore_width = unique(as.numeric(spore_width_mean)),
    Order = unique(Order)
  ) |>
  ggstatsplot::ggbetweenstats(Time, spore_length)
} # }