Make upset plot for phyloseq object.

Alternative to venn plot.

Usage

upset_pq(
  physeq,
  fact,
  taxa_fill = NULL,
  min_nb_seq = 0,
  na_remove = TRUE,
  numeric_fonction = sum,
  rarefy_after_merging = FALSE,
  ...
)

Arguments

physeq: (required): a phyloseq-class object obtained using the phyloseq package.
fact: (required): Name of the factor to cluster samples by modalities. Need to be in physeq@sam_data.
taxa_fill: (default NULL) fill the ASV upset using a column in tax_table slot.
min_nb_seq: minimum number of sequences by OTUs by samples to take into count this OTUs in this sample. For example, if min_nb_seq=2,each value of 2 or less in the OTU table will not count in the venn diagram
na_remove: : if TRUE (the default), NA values in fact are removed if FALSE, NA values are set to "NA"
numeric_fonction: (default : sum) the function for numeric vector useful only for complex plot (see examples)
rarefy_after_merging: Rarefy each sample after merging by the modalities of fact parameter
...: Additional arguments passed on to the ComplexUpset::upset()

Value

A ggplot2 plot

Author

Adrien Taudière

Examples

if (requireNamespace("ComplexUpset")) {
  upset_pq(data_fungi_mini,
    fact = "Height", width_ratio = 0.2,
    taxa_fill = "Class"
  )
}
#> Loading required namespace: ComplexUpset
#> Cleaning suppress 0 taxa (  ) and 0 sample(s) (  ).
#> Number of non-matching ASV 0
#> Number of matching ASV 45
#> Number of filtered-out ASV 0
#> Number of kept ASV 45
#> Number of kept samples 137

# \donttest{
if (requireNamespace("ComplexUpset")) {
  upset_pq(data_fungi_mini, fact = "Height", min_nb_seq = 1000)
  upset_pq(data_fungi_mini, fact = "Height", na_remove = FALSE)

  upset_pq(data_fungi_mini, fact = "Time", width_ratio = 0.2, rarefy_after_merging = TRUE)

  upset_pq(
    data_fungi_mini,
    fact = "Time",
    width_ratio = 0.2,
    annotations = list(
      "Sequences per ASV \n (log10)" = (
        ggplot(mapping = aes(y = log10(Abundance)))
        +
          geom_jitter(aes(
            color =
              Abundance
          ), na.rm = TRUE)
          +
          geom_violin(alpha = 0.5, na.rm = TRUE) +
          theme(legend.key.size = unit(0.2, "cm")) +
          theme(axis.text = element_text(size = 12))
      ),
      "ASV per phylum" = (
        ggplot(mapping = aes(fill = Phylum))
        +
          geom_bar() +
          ylab("ASV per phylum") +
          theme(legend.key.size = unit(0.2, "cm")) +
          theme(axis.text = element_text(size = 12))
      )
    )
  )

  upset_pq(
    data_fungi_mini,
    fact = "Time",
    width_ratio = 0.2,
    numeric_fonction = mean,
    annotations = list(
      "Sequences per ASV \n (log10)" = (
        ggplot(mapping = aes(y = log10(Abundance)))
        +
          geom_jitter(aes(
            color =
              Abundance
          ), na.rm = TRUE)
          +
          geom_violin(alpha = 0.5, na.rm = TRUE) +
          theme(legend.key.size = unit(0.2, "cm")) +
          theme(axis.text = element_text(size = 12))
      ),
      "ASV per phylum" = (
        ggplot(mapping = aes(fill = Phylum))
        +
          geom_bar() +
          ylab("ASV per phylum") +
          theme(legend.key.size = unit(0.2, "cm")) +
          theme(axis.text = element_text(size = 12))
      )
    )
  )

  upset_pq(
    subset_taxa(data_fungi_mini, Phylum == "Basidiomycota"),
    fact = "Time",
    width_ratio = 0.2,
    base_annotations = list(),
    annotations = list(
      "Sequences per ASV \n (log10)" = (
        ggplot(mapping = aes(y = log10(Abundance)))
        +
          geom_jitter(aes(
            color =
              Abundance
          ), na.rm = TRUE)
          +
          geom_violin(alpha = 0.5, na.rm = TRUE) +
          theme(legend.key.size = unit(0.2, "cm")) +
          theme(axis.text = element_text(size = 12))
      ),
      "ASV per phylum" = (
        ggplot(mapping = aes(fill = Class))
        +
          geom_bar() +
          ylab("ASV per Class") +
          theme(legend.key.size = unit(0.2, "cm")) +
          theme(axis.text = element_text(size = 12))
      )
    )
  )

  data_fungi2 <- data_fungi_mini
  data_fungi2@sam_data[["Time_0"]] <- data_fungi2@sam_data$Time == 0
  data_fungi2@sam_data[["Height__Time_0"]] <-
    paste0(data_fungi2@sam_data[["Height"]], "__", data_fungi2@sam_data[["Time_0"]])
  data_fungi2@sam_data[["Height__Time_0"]][grepl("NA", data_fungi2@sam_data[["Height__Time_0"]])] <-
    NA
  upset_pq(data_fungi2, fact = "Height__Time_0", width_ratio = 0.2, min_size = 2)
}
#> Cleaning suppress 0 taxa (  ) and 0 sample(s) (  ).
#> Number of non-matching ASV 0
#> Number of matching ASV 45
#> Number of filtered-out ASV 0
#> Number of kept ASV 45
#> Number of kept samples 137
#> Cleaning suppress 0 taxa (  ) and 0 sample(s) (  ).
#> Number of non-matching ASV 0
#> Number of matching ASV 45
#> Number of filtered-out ASV 0
#> Number of kept ASV 45
#> Number of kept samples 137
#> Cleaning suppress 0 taxa (  ) and 0 sample(s) (  ).
#> Number of non-matching ASV 0
#> Number of matching ASV 45
#> Number of filtered-out ASV 0
#> Number of kept ASV 45
#> Number of kept samples 137
#> You set `rngseed` to FALSE. Make sure you've set & recorded
#>  the random seed of your session for reproducibility.
#> See `?set.seed`
#> ...
#> At least one sample name start with a zero.
#>     That can be a problem for some phyloseq functions such as
#>     plot_bar and psmelt.
#> Cleaning suppress 0 taxa (  ) and 0 sample(s) (  ).
#> Number of non-matching ASV 0
#> Number of matching ASV 45
#> Number of filtered-out ASV 0
#> Number of kept ASV 45
#> Number of kept samples 137
#> Cleaning suppress 0 taxa (  ) and 0 sample(s) (  ).
#> Number of non-matching ASV 0
#> Number of matching ASV 45
#> Number of filtered-out ASV 0
#> Number of kept ASV 45
#> Number of kept samples 137
#> Cleaning suppress 0 taxa (  ) and 0 sample(s) (  ).
#> Number of non-matching ASV 0
#> Number of matching ASV 45
#> Number of filtered-out ASV 0
#> Number of kept ASV 45
#> Number of kept samples 137
#> Cleaning suppress 0 taxa (  ) and 0 sample(s) (  ).
#> Number of non-matching ASV 0
#> Number of matching ASV 45
#> Number of filtered-out ASV 0
#> Number of kept ASV 45
#> Number of kept samples 137

# }

Usage

Arguments

Value

See also

Author

Examples